1/* Kernel communication using routing socket.
2 * Copyright (C) 1999 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING.  If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22#include <zebra.h>
23
24#include "if.h"
25#include "prefix.h"
26#include "sockunion.h"
27#include "connected.h"
28#include "memory.h"
29#include "ioctl.h"
30#include "log.h"
31#include "str.h"
32#include "table.h"
33#include "rib.h"
34#include "privs.h"
35
36#include "zebra/interface.h"
37#include "zebra/zserv.h"
38#include "zebra/debug.h"
39#include "zebra/kernel_socket.h"
40
41extern struct zebra_privs_t zserv_privs;
42extern struct zebra_t zebrad;
43
44/*
45 * Historically, the BSD routing socket has aligned data following a
46 * struct sockaddr to sizeof(long), which was 4 bytes on some
47 * platforms, and 8 bytes on others.  NetBSD 6 changed the routing
48 * socket to align to sizeof(uint64_t), which is 8 bytes.  OS X
49 * appears to align to sizeof(int), which is 4 bytes.
50 *
51 * Alignment of zero-sized sockaddrs is nonsensical, but historically
52 * BSD defines RT_ROUNDUP(0) to be the alignment interval (rather than
53 * 0).  We follow this practice without questioning it, but it is a
54 * bug if quagga calls ROUNDUP with 0.
55 */
56
57/*
58 * Because of these varying conventions, the only sane approach is for
59 * the <net/route.h> header to define some flavor of ROUNDUP macro.
60 */
61#if defined(RT_ROUNDUP)
62#define ROUNDUP(a)	RT_ROUNDUP(a)
63#endif /* defined(RT_ROUNDUP) */
64
65/*
66 * If ROUNDUP has not yet been defined in terms of platform-provided
67 * defines, attempt to cope with heuristics.
68 */
69#if !defined(ROUNDUP)
70
71/*
72 * It's a bug for a platform not to define rounding/alignment for
73 * sockaddrs on the routing socket.  This warning really is
74 * intentional, to provoke filing bug reports with operating systems
75 * that don't define RT_ROUNDUP or equivalent.
76 */
77#warning "net/route.h does not define RT_ROUNDUP; making unwarranted assumptions!"
78
79/* OS X (Xcode as of 2014-12) is known not to define RT_ROUNDUP */
80#ifdef __APPLE__
81#define ROUNDUP_TYPE	long
82#else
83#define ROUNDUP_TYPE	int
84#endif
85
86#define ROUNDUP(a) \
87  ((a) > 0 ? (1 + (((a) - 1) | (sizeof(ROUNDUP_TYPE) - 1))) : sizeof(ROUNDUP_TYPE))
88
89#endif /* defined(ROUNDUP) */
90
91/*
92 * Given a pointer (sockaddr or void *), return the number of bytes
93 * taken up by the sockaddr and any padding needed for alignment.
94 */
95#if defined(HAVE_STRUCT_SOCKADDR_SA_LEN)
96#define SAROUNDUP(X)   ROUNDUP(((struct sockaddr *)(X))->sa_len)
97#elif defined(HAVE_IPV6)
98/*
99 * One would hope all fixed-size structure definitions are aligned,
100 * but round them up nonetheless.
101 */
102#define SAROUNDUP(X) \
103    (((struct sockaddr *)(X))->sa_family == AF_INET ?   \
104      ROUNDUP(sizeof(struct sockaddr_in)):\
105      (((struct sockaddr *)(X))->sa_family == AF_INET6 ? \
106       ROUNDUP(sizeof(struct sockaddr_in6)) :  \
107       (((struct sockaddr *)(X))->sa_family == AF_LINK ? \
108         ROUNDUP(sizeof(struct sockaddr_dl)) : sizeof(struct sockaddr))))
109#else /* HAVE_IPV6 */
110#define SAROUNDUP(X) \
111      (((struct sockaddr *)(X))->sa_family == AF_INET ?   \
112        ROUNDUP(sizeof(struct sockaddr_in)):\
113         (((struct sockaddr *)(X))->sa_family == AF_LINK ? \
114           ROUNDUP(sizeof(struct sockaddr_dl)) : sizeof(struct sockaddr)))
115#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */
116
117/*
118 * We use a call to an inline function to copy (PNT) to (DEST)
119 * 1. Calculating the length of the copy requires an #ifdef to determine
120 *    if sa_len is a field and can't be used directly inside a #define
121 * 2. So the compiler doesn't complain when DEST is NULL, which is only true
122 *    when we are skipping the copy and incrementing to the next SA
123 */
124static void inline
125rta_copy (union sockunion *dest, caddr_t src) {
126  int len;
127#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
128  len = (((struct sockaddr *)src)->sa_len > sizeof (*dest)) ?
129            sizeof (*dest) : ((struct sockaddr *)src)->sa_len ;
130#else
131  len = (SAROUNDUP (src) > sizeof (*dest)) ?
132            sizeof (*dest) : SAROUNDUP (src) ;
133#endif
134  memcpy (dest, src, len);
135}
136
137#define RTA_ADDR_GET(DEST, RTA, RTMADDRS, PNT) \
138  if ((RTMADDRS) & (RTA)) \
139    { \
140      int len = SAROUNDUP ((PNT)); \
141      if ( ((DEST) != NULL) && \
142           af_check (((struct sockaddr *)(PNT))->sa_family)) \
143        rta_copy((DEST), (PNT)); \
144      (PNT) += len; \
145    }
146#define RTA_ATTR_GET(DEST, RTA, RTMADDRS, PNT) \
147  if ((RTMADDRS) & (RTA)) \
148    { \
149      int len = SAROUNDUP ((PNT)); \
150      if ((DEST) != NULL) \
151        rta_copy((DEST), (PNT)); \
152      (PNT) += len; \
153    }
154
155#define RTA_NAME_GET(DEST, RTA, RTMADDRS, PNT, LEN) \
156  if ((RTMADDRS) & (RTA)) \
157    { \
158      u_char *pdest = (u_char *) (DEST); \
159      int len = SAROUNDUP ((PNT)); \
160      struct sockaddr_dl *sdl = (struct sockaddr_dl *)(PNT); \
161      if (IS_ZEBRA_DEBUG_KERNEL) \
162        zlog_debug ("%s: RTA_SDL_GET nlen %d, alen %d", \
163                    __func__, sdl->sdl_nlen, sdl->sdl_alen); \
164      if ( ((DEST) != NULL) && (sdl->sdl_family == AF_LINK) \
165           && (sdl->sdl_nlen < IFNAMSIZ) && (sdl->sdl_nlen <= len) ) \
166        { \
167          memcpy (pdest, sdl->sdl_data, sdl->sdl_nlen); \
168          pdest[sdl->sdl_nlen] = '\0'; \
169          (LEN) = sdl->sdl_nlen; \
170        } \
171      (PNT) += len; \
172    } \
173  else \
174    { \
175      (LEN) = 0; \
176    }
177/* Routing socket message types. */
178const struct message rtm_type_str[] =
179{
180  {RTM_ADD,      "RTM_ADD"},
181  {RTM_DELETE,   "RTM_DELETE"},
182  {RTM_CHANGE,   "RTM_CHANGE"},
183  {RTM_GET,      "RTM_GET"},
184  {RTM_LOSING,   "RTM_LOSING"},
185  {RTM_REDIRECT, "RTM_REDIRECT"},
186  {RTM_MISS,     "RTM_MISS"},
187  {RTM_LOCK,     "RTM_LOCK"},
188#ifdef OLDADD
189  {RTM_OLDADD,   "RTM_OLDADD"},
190#endif /* RTM_OLDADD */
191#ifdef RTM_OLDDEL
192  {RTM_OLDDEL,   "RTM_OLDDEL"},
193#endif /* RTM_OLDDEL */
194  {RTM_RESOLVE,  "RTM_RESOLVE"},
195  {RTM_NEWADDR,  "RTM_NEWADDR"},
196  {RTM_DELADDR,  "RTM_DELADDR"},
197  {RTM_IFINFO,   "RTM_IFINFO"},
198#ifdef RTM_OIFINFO
199  {RTM_OIFINFO,   "RTM_OIFINFO"},
200#endif /* RTM_OIFINFO */
201#ifdef RTM_NEWMADDR
202  {RTM_NEWMADDR, "RTM_NEWMADDR"},
203#endif /* RTM_NEWMADDR */
204#ifdef RTM_DELMADDR
205  {RTM_DELMADDR, "RTM_DELMADDR"},
206#endif /* RTM_DELMADDR */
207#ifdef RTM_IFANNOUNCE
208  {RTM_IFANNOUNCE, "RTM_IFANNOUNCE"},
209#endif /* RTM_IFANNOUNCE */
210  {0,            NULL}
211};
212
213static const struct message rtm_flag_str[] =
214{
215  {RTF_UP,        "UP"},
216  {RTF_GATEWAY,   "GATEWAY"},
217  {RTF_HOST,      "HOST"},
218  {RTF_REJECT,    "REJECT"},
219  {RTF_DYNAMIC,   "DYNAMIC"},
220  {RTF_MODIFIED,  "MODIFIED"},
221  {RTF_DONE,      "DONE"},
222#ifdef RTF_MASK
223  {RTF_MASK,      "MASK"},
224#endif /* RTF_MASK */
225#ifdef RTF_CLONING
226  {RTF_CLONING,   "CLONING"},
227#endif /* RTF_CLONING */
228  {RTF_XRESOLVE,  "XRESOLVE"},
229  {RTF_LLINFO,    "LLINFO"},
230  {RTF_STATIC,    "STATIC"},
231  {RTF_BLACKHOLE, "BLACKHOLE"},
232#ifdef RTF_PRIVATE
233  {RTF_PRIVATE,	  "PRIVATE"},
234#endif /* RTF_PRIVATE */
235  {RTF_PROTO1,    "PROTO1"},
236  {RTF_PROTO2,    "PROTO2"},
237#ifdef RTF_PRCLONING
238  {RTF_PRCLONING, "PRCLONING"},
239#endif /* RTF_PRCLONING */
240#ifdef RTF_WASCLONED
241  {RTF_WASCLONED, "WASCLONED"},
242#endif /* RTF_WASCLONED */
243#ifdef RTF_PROTO3
244  {RTF_PROTO3,    "PROTO3"},
245#endif /* RTF_PROTO3 */
246#ifdef RTF_PINNED
247  {RTF_PINNED,    "PINNED"},
248#endif /* RTF_PINNED */
249#ifdef RTF_LOCAL
250  {RTF_LOCAL,    "LOCAL"},
251#endif /* RTF_LOCAL */
252#ifdef RTF_BROADCAST
253  {RTF_BROADCAST, "BROADCAST"},
254#endif /* RTF_BROADCAST */
255#ifdef RTF_MULTICAST
256  {RTF_MULTICAST, "MULTICAST"},
257#endif /* RTF_MULTICAST */
258#ifdef RTF_MULTIRT
259  {RTF_MULTIRT,   "MULTIRT"},
260#endif /* RTF_MULTIRT */
261#ifdef RTF_SETSRC
262  {RTF_SETSRC,    "SETSRC"},
263#endif /* RTF_SETSRC */
264  {0,             NULL}
265};
266
267/* Kernel routing update socket. */
268int routing_sock = -1;
269
270/* Yes I'm checking ugly routing socket behavior. */
271/* #define DEBUG */
272
273/* Supported address family check. */
274static int inline
275af_check (int family)
276{
277  if (family == AF_INET)
278    return 1;
279#ifdef HAVE_IPV6
280  if (family == AF_INET6)
281    return 1;
282#endif /* HAVE_IPV6 */
283  return 0;
284}
285
286/* Dump routing table flag for debug purpose. */
287static void
288rtm_flag_dump (int flag)
289{
290  const struct message *mes;
291  static char buf[BUFSIZ];
292
293  buf[0] = '\0';
294  for (mes = rtm_flag_str; mes->key != 0; mes++)
295    {
296      if (mes->key & flag)
297	{
298	  strlcat (buf, mes->str, BUFSIZ);
299	  strlcat (buf, " ", BUFSIZ);
300	}
301    }
302  zlog_debug ("Kernel: %s", buf);
303}
304
305#ifdef RTM_IFANNOUNCE
306/* Interface adding function */
307static int
308ifan_read (struct if_announcemsghdr *ifan)
309{
310  struct interface *ifp;
311
312  ifp = if_lookup_by_index (ifan->ifan_index);
313
314  if (ifp)
315    assert ( (ifp->ifindex == ifan->ifan_index)
316             || (ifp->ifindex == IFINDEX_INTERNAL) );
317
318  if ( (ifp == NULL)
319      || ((ifp->ifindex == IFINDEX_INTERNAL)
320          && (ifan->ifan_what == IFAN_ARRIVAL)) )
321    {
322      if (IS_ZEBRA_DEBUG_KERNEL)
323        zlog_debug ("%s: creating interface for ifindex %d, name %s",
324                    __func__, ifan->ifan_index, ifan->ifan_name);
325
326      /* Create Interface */
327      ifp = if_get_by_name_len(ifan->ifan_name,
328			       strnlen(ifan->ifan_name,
329				       sizeof(ifan->ifan_name)));
330      ifp->ifindex = ifan->ifan_index;
331
332      if_get_metric (ifp);
333      if_add_update (ifp);
334    }
335  else if (ifp != NULL && ifan->ifan_what == IFAN_DEPARTURE)
336    if_delete_update (ifp);
337
338  if_get_flags (ifp);
339  if_get_mtu (ifp);
340  if_get_metric (ifp);
341
342  if (IS_ZEBRA_DEBUG_KERNEL)
343    zlog_debug ("%s: interface %s index %d",
344                __func__, ifan->ifan_name, ifan->ifan_index);
345
346  return 0;
347}
348#endif /* RTM_IFANNOUNCE */
349
350#ifdef HAVE_BSD_IFI_LINK_STATE
351/* BSD link detect translation */
352static void
353bsd_linkdetect_translate (struct if_msghdr *ifm)
354{
355  if ((ifm->ifm_data.ifi_link_state >= LINK_STATE_UP) ||
356      (ifm->ifm_data.ifi_link_state == LINK_STATE_UNKNOWN))
357    SET_FLAG(ifm->ifm_flags, IFF_RUNNING);
358  else
359    UNSET_FLAG(ifm->ifm_flags, IFF_RUNNING);
360}
361#endif /* HAVE_BSD_IFI_LINK_STATE */
362
363/*
364 * Handle struct if_msghdr obtained from reading routing socket or
365 * sysctl (from interface_list).  There may or may not be sockaddrs
366 * present after the header.
367 */
368int
369ifm_read (struct if_msghdr *ifm)
370{
371  struct interface *ifp = NULL;
372  struct sockaddr_dl *sdl;
373  char ifname[IFNAMSIZ];
374  short ifnlen = 0;
375  caddr_t cp;
376
377  /* terminate ifname at head (for strnlen) and tail (for safety) */
378  ifname[IFNAMSIZ - 1] = '\0';
379
380  /* paranoia: sanity check structure */
381  if (ifm->ifm_msglen < sizeof(struct if_msghdr))
382    {
383      zlog_err ("ifm_read: ifm->ifm_msglen %d too short\n",
384		ifm->ifm_msglen);
385      return -1;
386    }
387
388  /*
389   * Check for a sockaddr_dl following the message.  First, point to
390   * where a socakddr might be if one follows the message.
391   */
392  cp = (void *)(ifm + 1);
393
394#ifdef SUNOS_5
395  /*
396   * XXX This behavior should be narrowed to only the kernel versions
397   * for which the structures returned do not match the headers.
398   *
399   * if_msghdr_t on 64 bit kernels in Solaris 9 and earlier versions
400   * is 12 bytes larger than the 32 bit version.
401   */
402  if (((struct sockaddr *) cp)->sa_family == AF_UNSPEC)
403  	cp = cp + 12;
404#endif
405
406  RTA_ADDR_GET (NULL, RTA_DST, ifm->ifm_addrs, cp);
407  RTA_ADDR_GET (NULL, RTA_GATEWAY, ifm->ifm_addrs, cp);
408  RTA_ATTR_GET (NULL, RTA_NETMASK, ifm->ifm_addrs, cp);
409  RTA_ADDR_GET (NULL, RTA_GENMASK, ifm->ifm_addrs, cp);
410  sdl = (struct sockaddr_dl *)cp;
411  RTA_NAME_GET (ifname, RTA_IFP, ifm->ifm_addrs, cp, ifnlen);
412  RTA_ADDR_GET (NULL, RTA_IFA, ifm->ifm_addrs, cp);
413  RTA_ADDR_GET (NULL, RTA_AUTHOR, ifm->ifm_addrs, cp);
414  RTA_ADDR_GET (NULL, RTA_BRD, ifm->ifm_addrs, cp);
415
416  if (IS_ZEBRA_DEBUG_KERNEL)
417    zlog_debug ("%s: sdl ifname %s", __func__, (ifnlen ? ifname : "(nil)"));
418
419  /*
420   * Look up on ifindex first, because ifindices are the primary handle for
421   * interfaces across the user/kernel boundary, for most systems.  (Some
422   * messages, such as up/down status changes on NetBSD, do not include a
423   * sockaddr_dl).
424   */
425  if ( (ifp = if_lookup_by_index (ifm->ifm_index)) != NULL )
426    {
427      /* we have an ifp, verify that the name matches as some systems,
428       * eg Solaris, have a 1:many association of ifindex:ifname
429       * if they dont match, we dont have the correct ifp and should
430       * set it back to NULL to let next check do lookup by name
431       */
432      if (ifnlen && (strncmp (ifp->name, ifname, IFNAMSIZ) != 0) )
433        {
434          if (IS_ZEBRA_DEBUG_KERNEL)
435            zlog_debug ("%s: ifp name %s doesnt match sdl name %s",
436                        __func__, ifp->name, ifname);
437          ifp = NULL;
438        }
439    }
440
441  /*
442   * If we dont have an ifp, try looking up by name.  Particularly as some
443   * systems (Solaris) have a 1:many mapping of ifindex:ifname - the ifname
444   * is therefore our unique handle to that interface.
445   *
446   * Interfaces specified in the configuration file for which the ifindex
447   * has not been determined will have ifindex == IFINDEX_INTERNAL, and such
448   * interfaces are found by this search, and then their ifindex values can
449   * be filled in.
450   */
451  if ( (ifp == NULL) && ifnlen)
452    ifp = if_lookup_by_name (ifname);
453
454  /*
455   * If ifp still does not exist or has an invalid index (IFINDEX_INTERNAL),
456   * create or fill in an interface.
457   */
458  if ((ifp == NULL) || (ifp->ifindex == IFINDEX_INTERNAL))
459    {
460      /*
461       * To create or fill in an interface, a sockaddr_dl (via
462       * RTA_IFP) is required.
463       */
464      if (!ifnlen)
465	{
466	  zlog_warn ("Interface index %d (new) missing ifname\n",
467		     ifm->ifm_index);
468	  return -1;
469	}
470
471#ifndef RTM_IFANNOUNCE
472      /* Down->Down interface should be ignored here.
473       * See further comment below.
474       */
475      if (!CHECK_FLAG (ifm->ifm_flags, IFF_UP))
476        return 0;
477#endif /* !RTM_IFANNOUNCE */
478
479      if (ifp == NULL)
480        {
481	  /* Interface that zebra was not previously aware of, so create. */
482	  ifp = if_create (ifname, ifnlen);
483	  if (IS_ZEBRA_DEBUG_KERNEL)
484	    zlog_debug ("%s: creating ifp for ifindex %d",
485	                __func__, ifm->ifm_index);
486        }
487
488      if (IS_ZEBRA_DEBUG_KERNEL)
489        zlog_debug ("%s: updated/created ifp, ifname %s, ifindex %d",
490                    __func__, ifp->name, ifp->ifindex);
491      /*
492       * Fill in newly created interface structure, or larval
493       * structure with ifindex IFINDEX_INTERNAL.
494       */
495      ifp->ifindex = ifm->ifm_index;
496
497#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */
498      bsd_linkdetect_translate(ifm);
499#endif /* HAVE_BSD_IFI_LINK_STATE */
500
501      if_flags_update (ifp, ifm->ifm_flags);
502#if defined(__bsdi__)
503      if_kvm_get_mtu (ifp);
504#else
505      if_get_mtu (ifp);
506#endif /* __bsdi__ */
507      if_get_metric (ifp);
508
509      /*
510       * XXX sockaddr_dl contents can be larger than the structure
511       * definition.  There are 2 big families here:
512       *  - BSD has sdl_len + sdl_data[16] + overruns sdl_data
513       *    we MUST use sdl_len here or we'll truncate data.
514       *  - Solaris has no sdl_len, but sdl_data[244]
515       *    presumably, it's not going to run past that, so sizeof()
516       *    is fine here.
517       * a nonzero ifnlen from RTA_NAME_GET() means sdl is valid
518       */
519      if (ifnlen)
520      {
521#ifdef HAVE_STRUCT_SOCKADDR_DL_SDL_LEN
522	memcpy (&ifp->sdl, sdl, sdl->sdl_len);
523#else
524	memcpy (&ifp->sdl, sdl, sizeof (struct sockaddr_dl));
525#endif /* HAVE_STRUCT_SOCKADDR_DL_SDL_LEN */
526      }
527
528      if_add_update (ifp);
529    }
530  else
531    /*
532     * Interface structure exists.  Adjust stored flags from
533     * notification.  If interface has up->down or down->up
534     * transition, call state change routines (to adjust routes,
535     * notify routing daemons, etc.).  (Other flag changes are stored
536     * but apparently do not trigger action.)
537     */
538    {
539      if (ifp->ifindex != ifm->ifm_index)
540        {
541          zlog_warn ("%s: index mismatch, ifname %s, ifp index %d, "
542                     "ifm index %d",
543                     __func__, ifp->name, ifp->ifindex, ifm->ifm_index);
544          return -1;
545        }
546
547#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */
548      bsd_linkdetect_translate(ifm);
549#endif /* HAVE_BSD_IFI_LINK_STATE */
550
551      /* update flags and handle operative->inoperative transition, if any */
552      if_flags_update (ifp, ifm->ifm_flags);
553
554#ifndef RTM_IFANNOUNCE
555      if (!if_is_up (ifp))
556          {
557            /* No RTM_IFANNOUNCE on this platform, so we can never
558             * distinguish between ~IFF_UP and delete. We must presume
559             * it has been deleted.
560             * Eg, Solaris will not notify us of unplumb.
561             *
562             * XXX: Fixme - this should be runtime detected
563             * So that a binary compiled on a system with IFANNOUNCE
564             * will still behave correctly if run on a platform without
565             */
566            if_delete_update (ifp);
567          }
568#endif /* RTM_IFANNOUNCE */
569      if (if_is_up (ifp))
570      {
571#if defined(__bsdi__)
572        if_kvm_get_mtu (ifp);
573#else
574        if_get_mtu (ifp);
575#endif /* __bsdi__ */
576        if_get_metric (ifp);
577      }
578    }
579
580#ifdef HAVE_NET_RT_IFLIST
581  ifp->stats = ifm->ifm_data;
582#endif /* HAVE_NET_RT_IFLIST */
583
584  if (IS_ZEBRA_DEBUG_KERNEL)
585    zlog_debug ("%s: interface %s index %d",
586                __func__, ifp->name, ifp->ifindex);
587
588  return 0;
589}
590
591/* Address read from struct ifa_msghdr. */
592static void
593ifam_read_mesg (struct ifa_msghdr *ifm,
594		union sockunion *addr,
595		union sockunion *mask,
596		union sockunion *brd,
597		char *ifname,
598		short *ifnlen)
599{
600  caddr_t pnt, end;
601  union sockunion dst;
602  union sockunion gateway;
603
604  pnt = (caddr_t)(ifm + 1);
605  end = ((caddr_t)ifm) + ifm->ifam_msglen;
606
607  /* Be sure structure is cleared */
608  memset (mask, 0, sizeof (union sockunion));
609  memset (addr, 0, sizeof (union sockunion));
610  memset (brd, 0, sizeof (union sockunion));
611  memset (&dst, 0, sizeof (union sockunion));
612  memset (&gateway, 0, sizeof (union sockunion));
613
614  /* We fetch each socket variable into sockunion. */
615  RTA_ADDR_GET (&dst, RTA_DST, ifm->ifam_addrs, pnt);
616  RTA_ADDR_GET (&gateway, RTA_GATEWAY, ifm->ifam_addrs, pnt);
617  RTA_ATTR_GET (mask, RTA_NETMASK, ifm->ifam_addrs, pnt);
618  RTA_ADDR_GET (NULL, RTA_GENMASK, ifm->ifam_addrs, pnt);
619  RTA_NAME_GET (ifname, RTA_IFP, ifm->ifam_addrs, pnt, *ifnlen);
620  RTA_ADDR_GET (addr, RTA_IFA, ifm->ifam_addrs, pnt);
621  RTA_ADDR_GET (NULL, RTA_AUTHOR, ifm->ifam_addrs, pnt);
622  RTA_ADDR_GET (brd, RTA_BRD, ifm->ifam_addrs, pnt);
623
624  if (IS_ZEBRA_DEBUG_KERNEL)
625    {
626      switch (sockunion_family(addr))
627        {
628	case AF_INET:
629	  {
630	    char buf[4][INET_ADDRSTRLEN];
631	    zlog_debug ("%s: ifindex %d, ifname %s, ifam_addrs 0x%x, "
632			"ifam_flags 0x%x, addr %s/%d broad %s dst %s "
633			"gateway %s",
634			__func__, ifm->ifam_index,
635			(ifnlen ? ifname : "(nil)"), ifm->ifam_addrs,
636			ifm->ifam_flags,
637			inet_ntop(AF_INET,&addr->sin.sin_addr,
638			          buf[0],sizeof(buf[0])),
639			ip_masklen(mask->sin.sin_addr),
640			inet_ntop(AF_INET,&brd->sin.sin_addr,
641			          buf[1],sizeof(buf[1])),
642			inet_ntop(AF_INET,&dst.sin.sin_addr,
643			          buf[2],sizeof(buf[2])),
644			inet_ntop(AF_INET,&gateway.sin.sin_addr,
645			          buf[3],sizeof(buf[3])));
646	  }
647	  break;
648#ifdef HAVE_IPV6
649	case AF_INET6:
650	  {
651	    char buf[4][INET6_ADDRSTRLEN];
652	    zlog_debug ("%s: ifindex %d, ifname %s, ifam_addrs 0x%x, "
653			"ifam_flags 0x%x, addr %s/%d broad %s dst %s "
654			"gateway %s",
655			__func__, ifm->ifam_index,
656			(ifnlen ? ifname : "(nil)"), ifm->ifam_addrs,
657			ifm->ifam_flags,
658			inet_ntop(AF_INET6,&addr->sin6.sin6_addr,
659			          buf[0],sizeof(buf[0])),
660			ip6_masklen(mask->sin6.sin6_addr),
661			inet_ntop(AF_INET6,&brd->sin6.sin6_addr,
662			          buf[1],sizeof(buf[1])),
663			inet_ntop(AF_INET6,&dst.sin6.sin6_addr,
664			          buf[2],sizeof(buf[2])),
665			inet_ntop(AF_INET6,&gateway.sin6.sin6_addr,
666			          buf[3],sizeof(buf[3])));
667	  }
668	  break;
669#endif /* HAVE_IPV6 */
670        default:
671	  zlog_debug ("%s: ifindex %d, ifname %s, ifam_addrs 0x%x",
672		      __func__, ifm->ifam_index,
673		      (ifnlen ? ifname : "(nil)"), ifm->ifam_addrs);
674	  break;
675        }
676    }
677
678  /* Assert read up end point matches to end point */
679  if (pnt != end)
680    zlog_warn ("ifam_read() doesn't read all socket data");
681}
682
683/* Interface's address information get. */
684int
685ifam_read (struct ifa_msghdr *ifam)
686{
687  struct interface *ifp = NULL;
688  union sockunion addr, mask, brd;
689  char ifname[INTERFACE_NAMSIZ];
690  short ifnlen = 0;
691  char isalias = 0;
692  int flags = 0;
693
694  ifname[0] = ifname[INTERFACE_NAMSIZ - 1] = '\0';
695
696  /* Allocate and read address information. */
697  ifam_read_mesg (ifam, &addr, &mask, &brd, ifname, &ifnlen);
698
699  if ((ifp = if_lookup_by_index(ifam->ifam_index)) == NULL)
700    {
701      zlog_warn ("%s: no interface for ifname %s, index %d",
702                 __func__, ifname, ifam->ifam_index);
703      return -1;
704    }
705
706  if (ifnlen && strncmp (ifp->name, ifname, INTERFACE_NAMSIZ))
707    isalias = 1;
708
709  /* N.B. The info in ifa_msghdr does not tell us whether the RTA_BRD
710     field contains a broadcast address or a peer address, so we are forced to
711     rely upon the interface type. */
712  if (if_is_pointopoint(ifp))
713    SET_FLAG(flags, ZEBRA_IFA_PEER);
714
715#if 0
716  /* it might seem cute to grab the interface metric here, however
717   * we're processing an address update message, and so some systems
718   * (e.g. FBSD) dont bother to fill in ifam_metric. Disabled, but left
719   * in deliberately, as comment.
720   */
721  ifp->metric = ifam->ifam_metric;
722#endif
723
724  /* Add connected address. */
725  switch (sockunion_family (&addr))
726    {
727    case AF_INET:
728      if (ifam->ifam_type == RTM_NEWADDR)
729	connected_add_ipv4 (ifp, flags, &addr.sin.sin_addr,
730			    ip_masklen (mask.sin.sin_addr),
731			    &brd.sin.sin_addr,
732			    (isalias ? ifname : NULL));
733      else
734	connected_delete_ipv4 (ifp, flags, &addr.sin.sin_addr,
735			       ip_masklen (mask.sin.sin_addr),
736			       &brd.sin.sin_addr);
737      break;
738#ifdef HAVE_IPV6
739    case AF_INET6:
740      /* Unset interface index from link-local address when IPv6 stack
741	 is KAME. */
742      if (IN6_IS_ADDR_LINKLOCAL (&addr.sin6.sin6_addr))
743	SET_IN6_LINKLOCAL_IFINDEX (addr.sin6.sin6_addr, 0);
744
745      if (ifam->ifam_type == RTM_NEWADDR)
746	connected_add_ipv6 (ifp, flags, &addr.sin6.sin6_addr,
747			    ip6_masklen (mask.sin6.sin6_addr),
748			    &brd.sin6.sin6_addr,
749			    (isalias ? ifname : NULL));
750      else
751	connected_delete_ipv6 (ifp,
752			       &addr.sin6.sin6_addr,
753			       ip6_masklen (mask.sin6.sin6_addr),
754			       &brd.sin6.sin6_addr);
755      break;
756#endif /* HAVE_IPV6 */
757    default:
758      /* Unsupported family silently ignore... */
759      break;
760    }
761
762  /* Check interface flag for implicit up of the interface. */
763  if_refresh (ifp);
764
765#ifdef SUNOS_5
766  /* In addition to lacking IFANNOUNCE, on SUNOS IFF_UP is strange.
767   * See comments for SUNOS_5 in interface.c::if_flags_mangle.
768   *
769   * Here we take care of case where the real IFF_UP was previously
770   * unset (as kept in struct zebra_if.primary_state) and the mangled
771   * IFF_UP (ie IFF_UP set || listcount(connected) has now transitioned
772   * to unset due to the lost non-primary address having DELADDR'd.
773   *
774   * we must delete the interface, because in between here and next
775   * event for this interface-name the administrator could unplumb
776   * and replumb the interface.
777   */
778  if (!if_is_up (ifp))
779    if_delete_update (ifp);
780#endif /* SUNOS_5 */
781
782  return 0;
783}
784
785/* Interface function for reading kernel routing table information. */
786static int
787rtm_read_mesg (struct rt_msghdr *rtm,
788	       union sockunion *dest,
789	       union sockunion *mask,
790	       union sockunion *gate,
791	       char *ifname,
792	       short *ifnlen)
793{
794  caddr_t pnt, end;
795
796  /* Pnt points out socket data start point. */
797  pnt = (caddr_t)(rtm + 1);
798  end = ((caddr_t)rtm) + rtm->rtm_msglen;
799
800  /* rt_msghdr version check. */
801  if (rtm->rtm_version != RTM_VERSION)
802      zlog (NULL, LOG_WARNING,
803	      "Routing message version different %d should be %d."
804	      "This may cause problem\n", rtm->rtm_version, RTM_VERSION);
805
806  /* Be sure structure is cleared */
807  memset (dest, 0, sizeof (union sockunion));
808  memset (gate, 0, sizeof (union sockunion));
809  memset (mask, 0, sizeof (union sockunion));
810
811  /* We fetch each socket variable into sockunion. */
812  RTA_ADDR_GET (dest, RTA_DST, rtm->rtm_addrs, pnt);
813  RTA_ADDR_GET (gate, RTA_GATEWAY, rtm->rtm_addrs, pnt);
814  RTA_ATTR_GET (mask, RTA_NETMASK, rtm->rtm_addrs, pnt);
815  RTA_ADDR_GET (NULL, RTA_GENMASK, rtm->rtm_addrs, pnt);
816  RTA_NAME_GET (ifname, RTA_IFP, rtm->rtm_addrs, pnt, *ifnlen);
817  RTA_ADDR_GET (NULL, RTA_IFA, rtm->rtm_addrs, pnt);
818  RTA_ADDR_GET (NULL, RTA_AUTHOR, rtm->rtm_addrs, pnt);
819  RTA_ADDR_GET (NULL, RTA_BRD, rtm->rtm_addrs, pnt);
820
821  /* If there is netmask information set it's family same as
822     destination family*/
823  if (rtm->rtm_addrs & RTA_NETMASK)
824    mask->sa.sa_family = dest->sa.sa_family;
825
826  /* Assert read up to the end of pointer. */
827  if (pnt != end)
828      zlog (NULL, LOG_WARNING, "rtm_read() doesn't read all socket data.");
829
830  return rtm->rtm_flags;
831}
832
833void
834rtm_read (struct rt_msghdr *rtm)
835{
836  int flags;
837  u_char zebra_flags;
838  union sockunion dest, mask, gate;
839  char ifname[INTERFACE_NAMSIZ + 1];
840  short ifnlen = 0;
841
842  zebra_flags = 0;
843
844  /* Read destination and netmask and gateway from rtm message
845     structure. */
846  flags = rtm_read_mesg (rtm, &dest, &mask, &gate, ifname, &ifnlen);
847  if (!(flags & RTF_DONE))
848    return;
849  if (IS_ZEBRA_DEBUG_KERNEL)
850    zlog_debug ("%s: got rtm of type %d (%s)", __func__, rtm->rtm_type,
851      lookup (rtm_type_str, rtm->rtm_type));
852
853#ifdef RTF_CLONED	/*bsdi, netbsd 1.6*/
854  if (flags & RTF_CLONED)
855    return;
856#endif
857#ifdef RTF_WASCLONED	/*freebsd*/
858  if (flags & RTF_WASCLONED)
859    return;
860#endif
861
862  if ((rtm->rtm_type == RTM_ADD) && ! (flags & RTF_UP))
863    return;
864
865  /* This is connected route. */
866  if (! (flags & RTF_GATEWAY))
867      return;
868
869  if (flags & RTF_PROTO1)
870    SET_FLAG (zebra_flags, ZEBRA_FLAG_SELFROUTE);
871
872  /* This is persistent route. */
873  if (flags & RTF_STATIC)
874    SET_FLAG (zebra_flags, ZEBRA_FLAG_STATIC);
875
876  /* This is a reject or blackhole route */
877  if (flags & RTF_REJECT)
878    SET_FLAG (zebra_flags, ZEBRA_FLAG_REJECT);
879  if (flags & RTF_BLACKHOLE)
880    SET_FLAG (zebra_flags, ZEBRA_FLAG_BLACKHOLE);
881
882  if (dest.sa.sa_family == AF_INET)
883    {
884      struct prefix_ipv4 p;
885
886      p.family = AF_INET;
887      p.prefix = dest.sin.sin_addr;
888      if (flags & RTF_HOST)
889	p.prefixlen = IPV4_MAX_PREFIXLEN;
890      else
891	p.prefixlen = ip_masklen (mask.sin.sin_addr);
892
893      /* Catch self originated messages and match them against our current RIB.
894       * At the same time, ignore unconfirmed messages, they should be tracked
895       * by rtm_write() and kernel_rtm_ipv4().
896       */
897      if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid)
898      {
899        char buf[INET_ADDRSTRLEN], gate_buf[INET_ADDRSTRLEN];
900        int ret;
901        if (! IS_ZEBRA_DEBUG_RIB)
902          return;
903        ret = rib_lookup_ipv4_route (&p, &gate);
904        inet_ntop (AF_INET, &p.prefix, buf, INET_ADDRSTRLEN);
905        switch (rtm->rtm_type)
906        {
907          case RTM_ADD:
908          case RTM_GET:
909          case RTM_CHANGE:
910            /* The kernel notifies us about a new route in FIB created by us.
911               Do we have a correspondent entry in our RIB? */
912            switch (ret)
913            {
914              case ZEBRA_RIB_NOTFOUND:
915                zlog_debug ("%s: %s %s/%d: desync: RR isn't yet in RIB, while already in FIB",
916                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
917                break;
918              case ZEBRA_RIB_FOUND_CONNECTED:
919              case ZEBRA_RIB_FOUND_NOGATE:
920                inet_ntop (AF_INET, &gate.sin.sin_addr, gate_buf, INET_ADDRSTRLEN);
921                zlog_debug ("%s: %s %s/%d: desync: RR is in RIB, but gate differs (ours is %s)",
922                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen, gate_buf);
923                break;
924              case ZEBRA_RIB_FOUND_EXACT: /* RIB RR == FIB RR */
925                zlog_debug ("%s: %s %s/%d: done Ok",
926                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
927                rib_lookup_and_dump (&p);
928                return;
929                break;
930            }
931            break;
932          case RTM_DELETE:
933            /* The kernel notifies us about a route deleted by us. Do we still
934               have it in the RIB? Do we have anything instead? */
935            switch (ret)
936            {
937              case ZEBRA_RIB_FOUND_EXACT:
938                zlog_debug ("%s: %s %s/%d: desync: RR is still in RIB, while already not in FIB",
939                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
940                rib_lookup_and_dump (&p);
941                break;
942              case ZEBRA_RIB_FOUND_CONNECTED:
943              case ZEBRA_RIB_FOUND_NOGATE:
944                zlog_debug ("%s: %s %s/%d: desync: RR is still in RIB, plus gate differs",
945                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
946                rib_lookup_and_dump (&p);
947                break;
948              case ZEBRA_RIB_NOTFOUND: /* RIB RR == FIB RR */
949                zlog_debug ("%s: %s %s/%d: done Ok",
950                  __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
951                rib_lookup_and_dump (&p);
952                return;
953                break;
954            }
955            break;
956          default:
957            zlog_debug ("%s: %s/%d: warning: loopback RTM of type %s received",
958              __func__, buf, p.prefixlen, lookup (rtm_type_str, rtm->rtm_type));
959        }
960        return;
961      }
962
963      /* Change, delete the old prefix, we have no further information
964       * to specify the route really
965       */
966      if (rtm->rtm_type == RTM_CHANGE)
967        rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
968                         NULL, 0, 0, SAFI_UNICAST);
969
970      if (rtm->rtm_type == RTM_GET
971          || rtm->rtm_type == RTM_ADD
972          || rtm->rtm_type == RTM_CHANGE)
973	rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags,
974		      &p, &gate.sin.sin_addr, NULL, 0, 0, 0, 0, SAFI_UNICAST);
975      else
976	rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags,
977		      &p, &gate.sin.sin_addr, 0, 0, SAFI_UNICAST);
978    }
979#ifdef HAVE_IPV6
980  if (dest.sa.sa_family == AF_INET6)
981    {
982      /* One day we might have a debug section here like one in the
983       * IPv4 case above. Just ignore own messages at the moment.
984       */
985      if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid)
986        return;
987      struct prefix_ipv6 p;
988      unsigned int ifindex = 0;
989
990      p.family = AF_INET6;
991      p.prefix = dest.sin6.sin6_addr;
992      if (flags & RTF_HOST)
993	p.prefixlen = IPV6_MAX_PREFIXLEN;
994      else
995	p.prefixlen = ip6_masklen (mask.sin6.sin6_addr);
996
997#ifdef KAME
998      if (IN6_IS_ADDR_LINKLOCAL (&gate.sin6.sin6_addr))
999	{
1000	  ifindex = IN6_LINKLOCAL_IFINDEX (gate.sin6.sin6_addr);
1001	  SET_IN6_LINKLOCAL_IFINDEX (gate.sin6.sin6_addr, 0);
1002	}
1003#endif /* KAME */
1004
1005      /* CHANGE: delete the old prefix, we have no further information
1006       * to specify the route really
1007       */
1008      if (rtm->rtm_type == RTM_CHANGE)
1009        rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
1010                         NULL, 0, 0, SAFI_UNICAST);
1011
1012      if (rtm->rtm_type == RTM_GET
1013          || rtm->rtm_type == RTM_ADD
1014          || rtm->rtm_type == RTM_CHANGE)
1015	rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags,
1016		      &p, &gate.sin6.sin6_addr, ifindex, 0, 0, 0, SAFI_UNICAST);
1017      else
1018	rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags,
1019			 &p, &gate.sin6.sin6_addr, ifindex, 0, SAFI_UNICAST);
1020    }
1021#endif /* HAVE_IPV6 */
1022}
1023
1024/* Interface function for the kernel routing table updates.  Support
1025 * for RTM_CHANGE will be needed.
1026 * Exported only for rt_socket.c
1027 */
1028int
1029rtm_write (int message,
1030	   union sockunion *dest,
1031	   union sockunion *mask,
1032	   union sockunion *gate,
1033	   unsigned int index,
1034	   int zebra_flags,
1035	   int metric)
1036{
1037  int ret;
1038  caddr_t pnt;
1039  struct interface *ifp;
1040
1041  /* Sequencial number of routing message. */
1042  static int msg_seq = 0;
1043
1044  /* Struct of rt_msghdr and buffer for storing socket's data. */
1045  struct
1046  {
1047    struct rt_msghdr rtm;
1048    char buf[512];
1049  } msg;
1050
1051  if (routing_sock < 0)
1052    return ZEBRA_ERR_EPERM;
1053
1054  /* Clear and set rt_msghdr values */
1055  memset (&msg, 0, sizeof (struct rt_msghdr));
1056  msg.rtm.rtm_version = RTM_VERSION;
1057  msg.rtm.rtm_type = message;
1058  msg.rtm.rtm_seq = msg_seq++;
1059  msg.rtm.rtm_addrs = RTA_DST;
1060  msg.rtm.rtm_addrs |= RTA_GATEWAY;
1061  msg.rtm.rtm_flags = RTF_UP;
1062  msg.rtm.rtm_index = index;
1063
1064  if (metric != 0)
1065    {
1066      msg.rtm.rtm_rmx.rmx_hopcount = metric;
1067      msg.rtm.rtm_inits |= RTV_HOPCOUNT;
1068    }
1069
1070  ifp = if_lookup_by_index (index);
1071
1072  if (gate && message == RTM_ADD)
1073    msg.rtm.rtm_flags |= RTF_GATEWAY;
1074
1075  /* When RTF_CLONING is unavailable on BSD, should we set some
1076   * other flag instead?
1077   */
1078#ifdef RTF_CLONING
1079  if (! gate && message == RTM_ADD && ifp &&
1080      (ifp->flags & IFF_POINTOPOINT) == 0)
1081    msg.rtm.rtm_flags |= RTF_CLONING;
1082#endif /* RTF_CLONING */
1083
1084  /* If no protocol specific gateway is specified, use link
1085     address for gateway. */
1086  if (! gate)
1087    {
1088      if (!ifp)
1089        {
1090          char dest_buf[INET_ADDRSTRLEN] = "NULL", mask_buf[INET_ADDRSTRLEN] = "255.255.255.255";
1091          if (dest)
1092            inet_ntop (AF_INET, &dest->sin.sin_addr, dest_buf, INET_ADDRSTRLEN);
1093          if (mask)
1094            inet_ntop (AF_INET, &mask->sin.sin_addr, mask_buf, INET_ADDRSTRLEN);
1095          zlog_warn ("%s: %s/%s: gate == NULL and no gateway found for ifindex %d",
1096            __func__, dest_buf, mask_buf, index);
1097          return -1;
1098        }
1099      gate = (union sockunion *) & ifp->sdl;
1100    }
1101
1102  if (mask)
1103    msg.rtm.rtm_addrs |= RTA_NETMASK;
1104  else if (message == RTM_ADD)
1105    msg.rtm.rtm_flags |= RTF_HOST;
1106
1107  /* Tagging route with flags */
1108  msg.rtm.rtm_flags |= (RTF_PROTO1);
1109
1110  /* Additional flags. */
1111  if (zebra_flags & ZEBRA_FLAG_BLACKHOLE)
1112    msg.rtm.rtm_flags |= RTF_BLACKHOLE;
1113  if (zebra_flags & ZEBRA_FLAG_REJECT)
1114    msg.rtm.rtm_flags |= RTF_REJECT;
1115
1116
1117#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1118#define SOCKADDRSET(X,R) \
1119  if (msg.rtm.rtm_addrs & (R)) \
1120    { \
1121      int len = ROUNDUP ((X)->sa.sa_len); \
1122      memcpy (pnt, (caddr_t)(X), len); \
1123      pnt += len; \
1124    }
1125#else
1126#define SOCKADDRSET(X,R) \
1127  if (msg.rtm.rtm_addrs & (R)) \
1128    { \
1129      int len = SAROUNDUP (X); \
1130      memcpy (pnt, (caddr_t)(X), len); \
1131      pnt += len; \
1132    }
1133#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1134
1135  pnt = (caddr_t) msg.buf;
1136
1137  /* Write each socket data into rtm message buffer */
1138  SOCKADDRSET (dest, RTA_DST);
1139  SOCKADDRSET (gate, RTA_GATEWAY);
1140  SOCKADDRSET (mask, RTA_NETMASK);
1141
1142  msg.rtm.rtm_msglen = pnt - (caddr_t) &msg;
1143
1144  ret = write (routing_sock, &msg, msg.rtm.rtm_msglen);
1145
1146  if (ret != msg.rtm.rtm_msglen)
1147    {
1148      if (errno == EEXIST)
1149	return ZEBRA_ERR_RTEXIST;
1150      if (errno == ENETUNREACH)
1151	return ZEBRA_ERR_RTUNREACH;
1152      if (errno == ESRCH)
1153	return ZEBRA_ERR_RTNOEXIST;
1154
1155      zlog_warn ("%s: write : %s (%d)", __func__, safe_strerror (errno), errno);
1156      return ZEBRA_ERR_KERNEL;
1157    }
1158  return ZEBRA_ERR_NOERROR;
1159}
1160
1161
1162#include "thread.h"
1163#include "zebra/zserv.h"
1164
1165/* For debug purpose. */
1166static void
1167rtmsg_debug (struct rt_msghdr *rtm)
1168{
1169  zlog_debug ("Kernel: Len: %d Type: %s", rtm->rtm_msglen, lookup (rtm_type_str, rtm->rtm_type));
1170  rtm_flag_dump (rtm->rtm_flags);
1171  zlog_debug ("Kernel: message seq %d", rtm->rtm_seq);
1172  zlog_debug ("Kernel: pid %d, rtm_addrs 0x%x", rtm->rtm_pid, rtm->rtm_addrs);
1173}
1174
1175/* This is pretty gross, better suggestions welcome -- mhandler */
1176#ifndef RTAX_MAX
1177#ifdef RTA_NUMBITS
1178#define RTAX_MAX	RTA_NUMBITS
1179#else
1180#define RTAX_MAX	8
1181#endif /* RTA_NUMBITS */
1182#endif /* RTAX_MAX */
1183
1184/* Kernel routing table and interface updates via routing socket. */
1185static int
1186kernel_read (struct thread *thread)
1187{
1188  int sock;
1189  int nbytes;
1190  struct rt_msghdr *rtm;
1191
1192  /*
1193   * This must be big enough for any message the kernel might send.
1194   * Rather than determining how many sockaddrs of what size might be
1195   * in each particular message, just use RTAX_MAX of sockaddr_storage
1196   * for each.  Note that the sockaddrs must be after each message
1197   * definition, or rather after whichever happens to be the largest,
1198   * since the buffer needs to be big enough for a message and the
1199   * sockaddrs together.
1200   */
1201  union
1202  {
1203    /* Routing information. */
1204    struct
1205    {
1206      struct rt_msghdr rtm;
1207      struct sockaddr_storage addr[RTAX_MAX];
1208    } r;
1209
1210    /* Interface information. */
1211    struct
1212    {
1213      struct if_msghdr ifm;
1214      struct sockaddr_storage addr[RTAX_MAX];
1215    } im;
1216
1217    /* Interface address information. */
1218    struct
1219    {
1220      struct ifa_msghdr ifa;
1221      struct sockaddr_storage addr[RTAX_MAX];
1222    } ia;
1223
1224#ifdef RTM_IFANNOUNCE
1225    /* Interface arrival/departure */
1226    struct
1227    {
1228      struct if_announcemsghdr ifan;
1229      struct sockaddr_storage addr[RTAX_MAX];
1230    } ian;
1231#endif /* RTM_IFANNOUNCE */
1232
1233  } buf;
1234
1235  /* Fetch routing socket. */
1236  sock = THREAD_FD (thread);
1237
1238  nbytes= read (sock, &buf, sizeof buf);
1239
1240  if (nbytes <= 0)
1241    {
1242      if (nbytes < 0 && errno != EWOULDBLOCK && errno != EAGAIN)
1243	zlog_warn ("routing socket error: %s", safe_strerror (errno));
1244      return 0;
1245    }
1246
1247  thread_add_read (zebrad.master, kernel_read, NULL, sock);
1248
1249  if (IS_ZEBRA_DEBUG_KERNEL)
1250    rtmsg_debug (&buf.r.rtm);
1251
1252  rtm = &buf.r.rtm;
1253
1254  /*
1255   * Ensure that we didn't drop any data, so that processing routines
1256   * can assume they have the whole message.
1257   */
1258  if (rtm->rtm_msglen != nbytes)
1259    {
1260      zlog_warn ("kernel_read: rtm->rtm_msglen %d, nbytes %d, type %d\n",
1261		 rtm->rtm_msglen, nbytes, rtm->rtm_type);
1262      return -1;
1263    }
1264
1265  switch (rtm->rtm_type)
1266    {
1267    case RTM_ADD:
1268    case RTM_DELETE:
1269    case RTM_CHANGE:
1270      rtm_read (rtm);
1271      break;
1272    case RTM_IFINFO:
1273      ifm_read (&buf.im.ifm);
1274      break;
1275    case RTM_NEWADDR:
1276    case RTM_DELADDR:
1277      ifam_read (&buf.ia.ifa);
1278      break;
1279#ifdef RTM_IFANNOUNCE
1280    case RTM_IFANNOUNCE:
1281      ifan_read (&buf.ian.ifan);
1282      break;
1283#endif /* RTM_IFANNOUNCE */
1284    default:
1285      if (IS_ZEBRA_DEBUG_KERNEL)
1286        zlog_debug("Unprocessed RTM_type: %d", rtm->rtm_type);
1287      break;
1288    }
1289  return 0;
1290}
1291
1292/* Make routing socket. */
1293static void
1294routing_socket (void)
1295{
1296  if ( zserv_privs.change (ZPRIVS_RAISE) )
1297    zlog_err ("routing_socket: Can't raise privileges");
1298
1299  routing_sock = socket (AF_ROUTE, SOCK_RAW, 0);
1300
1301  if (routing_sock < 0)
1302    {
1303      if ( zserv_privs.change (ZPRIVS_LOWER) )
1304        zlog_err ("routing_socket: Can't lower privileges");
1305      zlog_warn ("Can't init kernel routing socket");
1306      return;
1307    }
1308
1309  /* XXX: Socket should be NONBLOCK, however as we currently
1310   * discard failed writes, this will lead to inconsistencies.
1311   * For now, socket must be blocking.
1312   */
1313  /*if (fcntl (routing_sock, F_SETFL, O_NONBLOCK) < 0)
1314    zlog_warn ("Can't set O_NONBLOCK to routing socket");*/
1315
1316  if ( zserv_privs.change (ZPRIVS_LOWER) )
1317    zlog_err ("routing_socket: Can't lower privileges");
1318
1319  /* kernel_read needs rewrite. */
1320  thread_add_read (zebrad.master, kernel_read, NULL, routing_sock);
1321}
1322
1323/* Exported interface function.  This function simply calls
1324   routing_socket (). */
1325void
1326kernel_init (void)
1327{
1328  routing_socket ();
1329}
1330