Thread (23 messages) 23 messages, 3 authors, 2003-12-10

Re: [ROUTE] PMTU only works on half the time

From: Julian Anastasov <ja@ssi.bg>
Date: 2003-12-03 22:03:20

	Hello,

On Tue, 2 Dec 2003, David S. Miller wrote:
You changes mean that for routes with specific output interfaces,
we will ignore ICMPs for those routes that arrive on other interfaces
due to assymetric routing.

Why don't you create a seperate patch that just has the TOS masking
changes?  That's much less controversial and thus something I'm likely
to apply.
	Here is a new version, all changes are:

- ip_rt_redirect works for entries oif=IIF and oif=0 as before

- ip_rt_redirect now supports RTO_ONLINK

- ip_rt_frag_needed now supports RTO_ONLINK and all oif!=0

- ifindex is not anymore a hash key. This is required for
ip_rt_frag_needed to deliver the event to all entries no
matter the oif key. I'm not sure if this is for good or for
bad for the hash table distribution. I hope jenkins hash is
ready for this as it is not a common case to have multiple
oifs per one saddr-daddr-tos.

- __ip_route_output_key now ignores illegal bits (bit 1) from tos

diff -Nru a/net/ipv4/route.c b/net/ipv4/route.c
--- a/net/ipv4/route.c	Wed Dec  3 23:37:00 2003
+++ b/net/ipv4/route.c	Wed Dec  3 23:37:00 2003
@@ -967,11 +967,11 @@
 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
 		    u32 saddr, u8 tos, struct net_device *dev)
 {
-	int i, k;
+	int i, j;
 	struct in_device *in_dev = in_dev_get(dev);
 	struct rtable *rth, **rthp;
 	u32  skeys[2] = { saddr, 0 };
-	int  ikeys[2] = { dev->ifindex, 0 };
+	u8 toskeys[2];
 
 	tos &= IPTOS_RT_MASK;
 
@@ -992,11 +992,12 @@
 			goto reject_redirect;
 	}
 
+	toskeys[0] = tos;
+	toskeys[1] = tos | RTO_ONLINK;
+	if (saddr && daddr)
 	for (i = 0; i < 2; i++) {
-		for (k = 0; k < 2; k++) {
-			unsigned hash = rt_hash_code(daddr,
-						     skeys[i] ^ (ikeys[k] << 5),
-						     tos);
+		for (j = 0; j < 2; j++) {
+			unsigned hash = rt_hash_code(daddr, skeys[i], toskeys[j]);
 
 			rthp=&rt_hash_table[hash].chain;
 
@@ -1007,8 +1008,9 @@
 				smp_read_barrier_depends();
 				if (rth->fl.fl4_dst != daddr ||
 				    rth->fl.fl4_src != skeys[i] ||
-				    rth->fl.fl4_tos != tos ||
-				    rth->fl.oif != ikeys[k] ||
+				    rth->fl.fl4_tos != toskeys[j] ||
+				    (rth->fl.oif &&
+				     rth->fl.oif != dev->ifindex) ||
 				    rth->fl.iif != 0) {
 					rthp = &rth->u.rt_next;
 					continue;
@@ -1105,8 +1107,7 @@
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
 			   rt->u.dst.expires) {
 			unsigned hash = rt_hash_code(rt->fl.fl4_dst,
-						     rt->fl.fl4_src ^
-							(rt->fl.oif << 5),
+						     rt->fl.fl4_src,
 						     rt->fl.fl4_tos);
 #if RT_CACHE_DEBUG >= 1
 			printk(KERN_DEBUG "ip_rt_advice: redirect to "
@@ -1239,19 +1240,21 @@
 
 unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
 {
-	int i;
+	int i, j;
 	unsigned short old_mtu = ntohs(iph->tot_len);
 	struct rtable *rth;
 	u32  skeys[2] = { iph->saddr, 0, };
 	u32  daddr = iph->daddr;
 	u8   tos = iph->tos & IPTOS_RT_MASK;
 	unsigned short est_mtu = 0;
+	u8 toskeys[2] = { tos, tos | RTO_ONLINK };
 
 	if (ipv4_config.no_pmtu_disc)
 		return 0;
 
+	for (j = 0; j < 2; j++)
 	for (i = 0; i < 2; i++) {
-		unsigned hash = rt_hash_code(daddr, skeys[i], tos);
+		unsigned hash = rt_hash_code(daddr, skeys[i], toskeys[j]);
 
 		rcu_read_lock();
 		for (rth = rt_hash_table[hash].chain; rth;
@@ -1261,7 +1264,7 @@
 			    rth->fl.fl4_src == skeys[i] &&
 			    rth->rt_dst  == daddr &&
 			    rth->rt_src  == iph->saddr &&
-			    rth->fl.fl4_tos == tos &&
+			    rth->fl.fl4_tos == toskeys[j] &&
 			    rth->fl.iif == 0 &&
 			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
 				unsigned short mtu = new_mtu;
@@ -1503,7 +1506,7 @@
 	RT_CACHE_STAT_INC(in_slow_mc);
 
 	in_dev_put(in_dev);
-	hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
+	hash = rt_hash_code(daddr, saddr, tos);
 	return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
 
 e_nobufs:
@@ -1554,7 +1557,7 @@
 	if (!in_dev)
 		goto out;
 
-	hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
+	hash = rt_hash_code(daddr, saddr, tos);
 
 	/* Check for the most weird martians, which can be not detected
 	   by fib_lookup.
@@ -1847,7 +1850,7 @@
 	int iif = dev->ifindex;
 
 	tos &= IPTOS_RT_MASK;
-	hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
+	hash = rt_hash_code(daddr, saddr, tos);
 
 	rcu_read_lock();
 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
@@ -2190,7 +2193,7 @@
 
 	rth->rt_flags = flags;
 
-	hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos);
+	hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src, tos);
 	err = rt_intern_hash(hash, rth, rp);
 done:
 	if (free_res)
@@ -2213,8 +2216,9 @@
 {
 	unsigned hash;
 	struct rtable *rth;
+	u8 tos = flp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK);
 
-	hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
+	hash = rt_hash_code(flp->fl4_dst, flp->fl4_src, tos);
 
 	rcu_read_lock();
 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
@@ -2226,8 +2230,7 @@
 #ifdef CONFIG_IP_ROUTE_FWMARK
 		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
 #endif
-		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
-			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+		    rth->fl.fl4_tos == tos) {
 			rth->u.dst.lastuse = jiffies;
 			dst_hold(&rth->u.dst);
 			rth->u.dst.__use++;
Regards

--
Julian Anastasov [off-list ref]
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help