1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
| /**
* ip_queue_xmit - IPv4数据包排队发送
* @sk: 套接字
* @skb: 要发送的数据包
* @fl: 流信息
*
* TCP/SCTP等协议通过此函数发送IPv4数据包
* 返回值:成功返回0,失败返回负错误码
*/
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
struct rtable *rt;
struct iphdr *iph;
int res;
/* 跳过目标不可达的已排队数据包 */
if (inet->pmtudisc == IP_PMTUDISC_PROBE &&
inet->cork.length) {
return ip_queue_mtu_discover(sk, skb);
}
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt);
fl4 = &fl->u.ip4;
rt = skb_rtable(skb);
if (rt)
goto packet_routed;
/* 如果没有路由,需要进行路由查找 */
rt = ip_route_output_ports(net, fl4, sk,
inet->inet_daddr, inet->inet_saddr,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
sk_setup_caps(sk, &rt->dst);
packet_routed:
skb_dst_set_noref(skb, &rt->dst);
/* 现在我们知道了路由,需要填充IP头部 */
skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
iph->frag_off = htons(IP_DF);
else
iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
/* 传输层必须设置以下字段:tot_len, id */
if (inet_opt && inet_opt->opt.optlen) {
iph->ihl += inet_opt->opt.optlen >> 2;
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
ip_select_ident_segs(net, skb, sk,
skb_shinfo(skb)->gso_segs ?: 1);
/* 添加IP头部校验和 */
ip_send_check(iph);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
return res;
no_route:
rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
/**
* ip_build_and_send_pkt - 构建并发送IP数据包
* @skb: 数据包
* @sk: 套接字
* @saddr: 源地址
* @daddr: 目标地址
* @opt: IP选项
* @tos: 服务类型
* @priority: 优先级
* @mark: 标记
*
* 构建完整的IP头部并发送数据包
* 返回值:成功返回0,失败返回负错误码
*/
int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options_rcu *opt,
u8 tos, int priority, u32 mark)
{
struct net *net = sock_net(sk);
struct iphdr *iph;
int err;
/* 构建IP头部 */
skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = tos;
iph->ttl = ip_select_ttl(inet_sk(sk), skb_dst(skb));
iph->daddr = (opt && opt->opt.srr) ? opt->opt.faddr : daddr;
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
if (ip_dont_fragment(sk, skb_dst(skb))) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
__ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
ip_options_build(skb, &opt->opt, daddr, skb_rtable(skb), 0);
}
iph->tot_len = htons(skb->len);
ip_send_check(iph);
skb->priority = priority;
skb->mark = mark;
/* Send it out. */
return ip_local_out(net, sk, skb);
}
/**
* ip_send_check - 计算IP头部校验和
* @iph: IP头部指针
*
* 计算并设置IP头部的校验和字段
*/
static inline void ip_send_check(struct iphdr *iph)
{
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
/**
* ip_local_out - 本地IP数据包输出
* @net: 网络命名空间
* @sk: 套接字
* @skb: 数据包
*
* 本地生成的IP数据包输出处理
* 返回值:处理结果
*/
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int err;
err = __ip_local_out(net, sk, skb);
if (likely(err == 1))
err = dst_output(net, sk, skb);
return err;
}
/**
* __ip_local_out - 内部本地输出处理
* @net: 网络命名空间
* @sk: 套接字
* @skb: 数据包
*
* 执行netfilter LOCAL_OUT钩子
* 返回值:1表示继续处理,其他值表示已处理
*/
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
/* 如果启用了GSO并且数据包长度超过MTU,跳过netfilter */
if (skb_is_gso(skb) ||
((ntohs(iph->tot_len) > skb_dst(skb)->dev->mtu) && !skb_is_gso(skb)))
return ip_fragment(net, sk, skb, skb_dst(skb)->dev->mtu,
ip_finish_output);
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
|