从Linux5.9看Icmp处理流程转载本文请联系编程杂技公众号。昨天有同学遇到发送udp包时收到目的地不可达icmp包的问题。本文简单介绍下linux5.9中icmp数据包的处理流程。发送icmp包的过程下面以udp为例,看看什么时候发送目的不可达包。我们从收到一个udp包开始分析,具体函数是udp_rcv。intudp_rcv(structsk_buff*skb){return__udp4_lib_rcv(skb,&udp_table,IPPROTO_UDP);}int__udp4_lib_rcv(structsk_buff*skb,structudp_table*udptable,intproto){structsock*sk;structudphdr*呃;unsignedshortulen;structr;table__*rt=skbaddr(2skbaddr),daddr;structnet*net=dev_net(skb->dev);boolrefcounted;//udpheaderuh=udp_hdr(skb);ulen=ntohs(uh->len);//sourcedestinationipsaddr=ip_hdr(skb)->saddr;daddr=ip_hdr(skb)->daddr;//header指示的大小小于实际数据if(ulen>skb->len)gotoshort_packet;if(proto==IPPROTO_UDP){uh=udp_hdr(skb);}sk=skb_steal_sock(skb,&refcounted);//广播或多播if(rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))return__udp4_lib_mcast_deliver(net,skb,uh,saddr,daddr,udptable,proto);//单播,根据地址信息找到对应的socketsk=__udp4_lib_lookup_skb(skb,uh->source,uh->dest,udptable);//找到则挂到socketif(sk)returnudp_unicast_rcv_skb(sk,skb,uh);//找不到套接字然后回复一个ICMP_DEST_UNREACHicmppacketicmp_send(skb,ICMP_DEST_UNREACH,ICMP_PORT_UNREACH,0);kfree_skb(skb);return0;}通过ip包信息我们看到当找不到对应的socket时届时,将向发送方发送一个icmp数据包。icmp数据包的结构如下。接收icmp包的处理流程从接收ip包开始分析。intip_rcv(structsk_buff*skb,structnet_device*dev,structpacket_type*pt,structnet_device*orig_dev){structnet*net=dev_net(dev);skb=ip_rcv_core(skb,net);if(skb==NULL)returnNET_RX_DROP;returnNF_HOOK(NFPROTO_IPV4,NF_INET_PRE_ROUTING,net,NULL,skb,dev,NULL,ip_rcv_finish);}ip层收到包后会继续执行ip_rcv_finish。staticintip_rcv_finish(structnet*net,structsock*sk,structsk_buff*skb){structnet_device*dev=skb->dev;intret;ret=ip_rcv_finish_core(net,sk,skb,dev,NULL);if(ret!=NET_RX_DROP)ret=dst_input(skb);returnret;}然后执行dst_inputstaticinlineintdst_input(structsk_buff*skb){returnskb_dst(skb)->input(skb);}输入对应ip_local_deliver。intip_local_deliver(structsk_buff*skb){structnet*net=dev_net(skb->dev);returnNF_HOOK(NFPROTO_IPV4,NF_INET_LOCAL_IN,net,NULL,skb,skb->dev,NULL,ip_local_deliver_finish);}然后执行ip_local_deliver_finish。staticintip_local_deliver_finish(structnet*net,structsock*sk,structsk_buff*skb){__skb_pull(skb,skb_network_header_len(skb));rcu_read_lock();ip_protocol_deliver_rcu(net,skb,ip_hdr(skb)->协议);rcu_read_unlock();返回}ip_local_deliver_finish会执行ip_protocol_deliver_rcu做进一步的处理,ip_protocol_deliver_rcu的最后一个入参是ip包中的协议域(上层协议)。voidip_protocol_deliver_rcu(structnet*net,structsk_buff*skb,intprotocol){conststructnet_protocol*ipprot;intraw,ret;resubmit://根据协议找到对应的处理函数,这里是icmpipprot=rcu_dereference(inet_protos[protocol]);if(ipprot){ret=INDIRECT_CALL_2(ipprot->handler,tcp_v4_rcv,udp_rcv,skb);if(ret<0){protocol=-ret;gotoresubmit;}__IP_INC_STATS(net,IPSTATS_MIB_INDELIVERS);}}INDIRECT_CALL_2是一个宏。#defineINDIRECT_CALL_1(f,f1,...)\({\likely(f==f1)?f1(__VA_ARGS__):f(__VA_ARGS__);\})#defineINDIRECT_CALL_2(f,f2,f1,...)\({\likely(f==f2)?f2(__VA_ARGS__):\INDIRECT_CALL_1(f,f1,__VA_ARGS__);\})因为这里的协议是icmp协议。所以icmp对应的handler会被执行。那么它对应的是哪个函数呢?让我们看看inet_protos是什么。structnet_protocol__rcu*inet_protos[MAX_INET_PROTOS]__read_mostly;intinet_add_protocol(conststructnet_protocol*prot,unsignedcharprotocol){return!cmpxchg((conststructnet_protocol**)&inet_protos[protocol],NULLprotocol,prot)我们看到寄存器函数0:-1add};以及相应的处理函数。我们来看看这个函数是在哪里调用的。staticint__initinet_init(void){inet_add_protocol(&icmp_protocol,IPPROTO_ICMP);inet_add_protocol(&udp_protocol,IPPROTO_UDP);...}会在内核初始化的时候注册一系列的协议和处理函数。我们来看看icmp的功能集。staticconststructnet_protocolicmp_protocol={.handler=icmp_rcv,.err_handler=icmp_err,.no_policy=1,.netns_ok=1,};我们看到处理程序是icmp_rcv。inticmp_rcv(structsk_buff*skb){structicmphdr*icmph;structrtable*rt=skb_rtable(skb);structnet*net=dev_net(rt->dst.dev);boolsuccess;//icmp头icmph=icmp_hdr(skb);success=icmp_pointers[icmph->type].handler(skb);}icmp_rcv根据icmp包的信息做进一步的处理。我看一下icmp_pointers的定义。staticconststructicmp_controlicmp_pointers[NR_ICMP_TYPES+1]={...[ICMP_DEST_UNREACH]={.handler=icmp_unreach,.error=1,},};这里我们只关注ICMP_DEST_UNREACH的处理。staticboolicmp_unreach(structsk_buff*skb){...icmp_socket_deliver(skb,info);}继续看icmp_socket_deliverstaticvoidicmp_socket_deliver(structsk_buff*skb,u32info){conststructiphdr*iph=(conststructiphdr*)skb->data;>protocol;//根据ip头的protocol字段找到对应的协议处理。这里的iph是触发错误的原始ip头,不是收到icmp包的ip头,所以协议是udpipprot=rcu_dereference(inet_protos[protocol]);if(ipprot&&ipprot->err_handler)ipprot->err_handler(skb,info);}然后执行udp的err_handler,即udp_errintudp_err(structsk_buff*skb,u32info){return__udp4_lib_err(skb,info,&udp_table);}int__udp4_libfb_err3(2*structsk,structudp_table*udptable){structinet_sock*inet;conststructiphdr*iph=(conststructiphdr*)skb->data;structudphdr*uh=(structudphdr*)(skb->data+(iph->ihl<<2));constinttype=icmp_hdr(skb)->type;constintcode=icmp_hdr(skb)->code;booltunnel=false;structsock*sk;intharderr;interr;structnet*net=dev_net(skb->dev);//根据消息信息找到对应的socketk=__udp4_lib_lookup(net,iph->daddr,uh->dest,iph->saddr,uh->source,skb->dev->ifindex,inet_sdif(skb),udptable,NULL);err=0;harderr=0;inet=inet_sk(sk);switch(type){caseICMP_DEST_UNREACH:err=EHOSTUNREACH;if(code<=NR_ICMP_UNREACH){harderr=icmp_err_convert[code].fatal;err=icmp_err_convert[code].errno;}break;...}//设置错误信息到socket中功能)。sk->sk_error_report=sock_def_error_report;接着看sock_def_error_reportstaticvoidsock_def_error_report(structsock*sk){structsocket_wq*wq;rcu_read_lock();wq=rcu_dereference(sk->sk_wq);if(skwq_has_sleeper(wq))wake_up_interruptible_poll>(&wEPqOL-LERR);sk_wake_async(sk,SOCK_WAKE_IO,POLL_ERR);rcu_read_unlock();}staticinlinevoidsk_wake_async(conststructsock*sk,inthow,intband){if(sock_flag(sk,SOCK_FASYNC)){rcu_read_lock();sock_wake_async(rcu_dereference(sk),>sk_wq如何,band);rcu_read_unlock();}}我们看到如果进程阻塞在socket中,它会被唤醒,或者设置SOCK_FASYNC标志来接收信号。后记:本文简单介绍了icmp的生成和处理,以后有空再细化。
