Layer2 层的数据结构 By:姚启桂 Date:2012-05-31 1, Layer2模块的初始化和退出(函数l2_forwarder.c) 1.1 l2_module_init()函数的加载调用过程 static int __initl2_module_init (void){ BDEBUG(\"IPI Layer2 Forwarder init\\n\"); hal_init ();#ifdef HAVE_L2 br_init ();#endif /* HAVE_L2 */#ifdef HAVE_AUTHD eapol_init ();#endif /* HAVE_AUTHD */#ifdef HAVE_LACPD lacp_init();#endif /* HAVE_LACPD */#ifdef HAVE_LLDP lldp_init ();#endif /* HAVE_LLDP */#ifdef HAVE_ELMID elmi_init ();#endif /* HAVE_ELMID */ return 0;}int hal_init (void){ BWARN (\"NET4: 7/19/2004 Linux HAL 1.0 for Net4.0\\n\"); sock_register (&hal_family_ops); return 0;}static struct net_proto_family hal_family_ops = { .family= PF_HAL, .create= hal_create, .owner= THIS_MODULE,};int hal_create (struct net *net, struct socket *sock, int protocol){ struct sock *sk; BDEBUG (\"protocol %d socket addr %p\\n\ if (!capable (CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; sk = sk_alloc (current->nsproxy->net_ns, PF_HAL, GFP_KERNEL, &_proto); if (sk == NULL) { return -ENOBUFS; } BDEBUG (\"sock %p\\n\ sock->ops = &hal_ops; sock_init_data (sock,sk); sk->sk_family = PF_HAL; sk->sk_protocol = protocol; sk->sk_destruct = hal_sock_destruct; if (atomic_read (&hal_socks_nr) == 0) { l2_mod_inc_use_count (); } atomic_inc (&hal_socks_nr); BR_WRITE_LOCK_BH (&hal_sklist_lock); sk_add_node (sk, &hal_sklist); BR_WRITE_UNLOCK_BH (&hal_sklist_lock); return 0;}int br_init (void){ BDEBUG(\"IPI Ethernet Bridge\\n\"); stp_init(); //spanning tree Protocol 的初始化 garp_init();//通用属性注册协议的初始化 igmp_snoop_init();//互联网组管理协议窥探的初始化#ifdef HAVE_EFM efm_init ();#endif /* HAVE_EFM */#ifdef HAVE_CFM cfm_init ();#endif /* HAVE_CFM */#ifdef HAVE_ELMID elmi_init ();#endif /* HAVE_ELMID */ register_ipi_handle_frame_hook(br_handle_frame); register_netdevice_notifier (&br_device_notifier);//注册一个网络通告程序块 return 0;} struct ipi_bridge { struct ipi_bridge *next; struct ipi_bridge **pprev;得到的端口信息保存在下面的结构体中,注意函数调用/* This function handles frames passed in by the OS. Learning and input port state filtering are performed here. */intbr_handle_frame ( struct sk_buff *skb ){ int learn, forward; struct sk_buff *nskb; unsigned short proto; struct ipi_bridge *br; struct vlan_info_entry *vinfo = NULL; struct vlan_info_entry **vlan_info_table; unsigned short instance = BR_INSTANCE_COMMON; unsigned char *dest_addr = skb->mac_header; unsigned char *src_addr = &skb_src_mac_addr(skb); struct ipi_bridge_port *port = (struct ipi_bridge_port *)(skb->dev->ipi_fwd_port); int is_local = 0; unsigned char edge; /* vid will be used ONLY for vlan-aware bridges and for transparent bridges the value will always be VLAN_NULL_VID */ vid_t cvid = VLAN_NULL_VID; vid_t svid = VLAN_NULL_FID; vid_t inner_tag = VLAN_NULL_VID; vid_t outer_tag = VLAN_NULL_FID; /* on indicates bridge is up and forwarding */ unsigned char flags; /* Frame statistics */ struct net_device_stats statistics; /* Dynamic forwarding database (fdb) lock */ rwlock_t hash_lock;…} /* Bridge name */ char name[IFNAMSIZ + 1]; int leela; /* Bridge lock */ rwlock_t lock; /* Ports that are part of this bridge */ struct ipi_bridge_port *port_list; /* Index for port ifindex */ int port_index; char port_id[BR_MAX_PORTS]; /* Stores whether the bridge is MSTP, RSTP or an STP bridge */ enum bridge_type type; /* No input port - discard frame */ if ( port == NULL ) { kfree_skb ( skb ); return -1; } BDEBUG ( \"Frame recvd on ifindex %d\\n\ br = port->br; if (br == NULL) { kfree_skb (skb); return -1; } BR_READ_LOCK ( &br->lock ); if ( ( skb->dev->ipi_fwd_port == NULL ) || /* Port may go bye-bye during lock */ ( !( br->flags & IPIBR_UP ) ) || /* Interface not up, discard frame */ ( skb_src_mac_addr(skb) & 1 ) ) /* Src address is multicast? */ { BDEBUG ( \"interface not up or bridge is not up or src multicast\\n\" ); BR_READ_UNLOCK ( &br->lock ); kfree_skb ( skb ); return -1; } if (port->port_type == PRO_NET_PORT || port->port_type == CUST_NET_PORT) { proto = ETH_P_8021Q_STAG; vlan_info_table = br->svlan_info_table; } else { proto = ETH_P_8021Q_CTAG; vlan_info_table = br->vlan_info_table; } if (skb_shared (skb) || skb_cloned (skb)) { nskb = skb_copy (skb, GFP_ATOMIC); if (nskb == NULL) { kfree_skb (skb); BR_READ_UNLOCK ( &br->lock ); return -1; } kfree_skb (skb); skb = nskb; }…….}struct vlan_info_entry { vid_t vid; vlan_state_t vlan_state; instance_id_t instance; unsigned int mtu_val; struct ipi_bridge *br; struct net_device *dev; struct net_device_stats net_stats; /* private VLAN information*/ bool_t pvlan_configured; pvlan_type_t pvlan_type; struct br_pvlan_info pvlan_info; /* Port whose hw addr we are using for the VI */ struct ipi_bridge_port *vi_addr_port;};struct ipi_bridge_port{ struct ipi_bridge_port *next; struct ipi_bridge_port *multicast_next; struct ipi_bridge *br; struct net_device *dev; /* Port number associated with the port */ int port_no; /* Port ID assigned from 0 - NPF_MAX_PORTS */ int port_id; /* Port state */ char state[BR_MAX_INSTANCES]; …….} 1.2 l2_module_exit()函数的加载调用过程l2_module_exit同上类似,具体查看相关代码static void __exit l2_module_exit (void){ BDEBUG(\"IPI Layer2 Forwarder exit\\n\"); hal_exit ();#ifdef HAVE_L2 br_exit ();#endif /* HAVE_L2 */#ifdef HAVE_AUTHD eapol_exit ();#endif /* HAVE_AUTHD */#ifdef HAVE_LACPD lacp_exit();#endif /* HAVE_LACPD */#ifdef HAVE_LLDP lldp_exit ();#endif /* HAVE_LLDP */#ifdef HAVE_ELMID elmi_exit ();#endif /* HAVE_ELMID */} 1.3 layer2_module模块的主要用到的socket Layer2模块通行主要要到的socket/* * Create a packet of type SOCK_HAL. */int hal_create (struct net *net, struct socket *sock, int protocol){ struct sock *sk; BDEBUG (\"protocol %d socket addr %p\\n\ if (!capable (CAP_NET_RAW)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->state = SS_UNCONNECTED; sk = sk_alloc (current->nsproxy->net_ns, PF_HAL, GFP_KERNEL, &_proto); if (sk == NULL) { return -ENOBUFS; } BDEBUG (\"sock %p\\n\ sock->ops = &hal_ops; sock_init_data (sock,sk); sk->sk_family = PF_HAL; sk->sk_protocol = protocol; sk->sk_destruct = hal_sock_destruct; if (atomic_read (&hal_socks_nr) == 0) { l2_mod_inc_use_count (); } atomic_inc (&hal_socks_nr); BR_WRITE_LOCK_BH (&hal_sklist_lock); sk_add_node (sk, &hal_sklist); BR_WRITE_UNLOCK_BH (&hal_sklist_lock); return 0;}/* * Close a HAL socket. This is fairly simple. We immediately go * to 'closed' state and remove our protocol entry in the device list. */static int hal_release (struct socket *sock){ struct sock *sk = sock->sk; struct sock **skp; BDEBUG (\"sock %p\\n\ if (!sk) return 0; BR_WRITE_LOCK_BH (&hal_sklist_lock); sk_del_node_init (sk); BR_WRITE_UNLOCK_BH (&hal_sklist_lock); br_cleanup_bridges(sk); //释放,删除桥设备的一些信息 参考具体的函数调用 /* * Now the socket is dead. No more input will appear. */ sock_orphan (sk); sock->sk = NULL; /* Purge queues */ skb_queue_purge (&sk->sk_receive_queue); if (atomic_dec_and_test (&hal_socks_nr)) { l2_mod_dec_use_count (); } sock_put (sk); return 0;} 2, 主要函数调用的结构图 主要用到结构体Br_type.h主要用到的句柄,学习端口的状态Br_input.cbr_handle_frame()用来转发数据帧Br_forward.clayer2模块函数L2_forwarder.cbr.c调用初始化函数Br_init()获得操作系统网口状态的函数br_notify.c主要用到的函数调用句柄Br_ioctl.cEthernet bridge userspace interface主要用到函数的集合Br_api.c老化时间调用结构Br_time.c老化时间句柄Br_time_handler.c回调函数Br_fdb.c设置vlan函数Br_vlan_api.c设置当前vlan函数,return调用Br_pro_vlan.c创建一个vlan devicesBr_vlan_dev.cAvl tree vlanBr_avl_vlan.c上面是主要用到的函数调用,具体的如何应用,参考源代码。 3, Layer2和hal层的消息通道(函数l2_forwarder.c) int hal_ioctl (struct socket *sock, unsigned int cmd, unsigned long arg) { int err = 0; unsigned long ioctl_cmd; BDEBUG (\"sock %p cmd %d arg 0x%lx\\n\ if (cmd != SIOCPROTOPRIVATE) { /* BDEBUG(\"dev_ioctl: sock %p cmd %d\\n\ return dev_ioctl (current->nsproxy->net_ns,cmd,(void *)arg); */ return -EINVAL; } if (!capable (CAP_NET_ADMIN)) { BDEBUG(\"sock %p not capable of ioctl\\n\ return -EPERM; } /* Command is arg 0 */ if (copy_from_user ((&ioctl_cmd), (void *)arg, (1 * sizeof (unsigned long)))) { BDEBUG (\"Unable to get command\\n\"); return -EFAULT; } down (&hal_ioctl_mutex); switch (ioctl_cmd) { #ifdef HAVE_LACPD case IPILACP_GET_VERSION: case IPILACP_ADD_AGG: case IPILACP_DEL_AGG: case IPILACP_AGG_LINK: case IPILACP_DEAGG_LINK: case IPILACP_SET_MACADDR: err = lacp_ioctl (sock, cmd, arg); 主要的函数调用接口 break; #endif /* HAVE_LACPD */ #ifdef HAVE_L2 //二层信息和配置,主要通过桥来实现 case IPIBR_GET_VERSION: case IPIBR_ADD_IF: //节点增加 case IPIBR_DEL_IF: //节点删除 case IPIBR_GET_BRIDGE_INFO: //获得桥的信息 case IPIBR_GET_PORT_LIST: //端口的状态,增加,删除 case IPIBR_SET_PORT_STATE: case IPIBR_GET_PORT_STATE: case IPIBR_SET_PORT_FWDER_FLAGS: case IPIBR_FLUSH_FDB_BY_PORT: case IPIBR_SET_AGEING_TIME: //老化时间的设置,关闭, case IPIBR_DISABLE_AGEING: case IPIBR_SET_DYNAMIC_AGEING_INTERVAL: case IPIBR_GET_PORT_INFO: case IPIBR_SET_BRIDGE_LEARNING: case IPIBR_GET_DYNFDB_ENTRIES: case IPIBR_ADD_STATFDB_ENTRY: case IPIBR_DEL_STATFDB_ENTRY: case IPIBR_ADD_DYNAMIC_FDB_ENTRY: case IPIBR_DEL_DYNAMIC_FDB_ENTRY: case IPIBR_GET_STATFDB_ENTRIES: case IPIBR_GET_BRIDGES: //桥的一些设置 case IPIBR_ADD_BRIDGE: case IPIBR_DEL_BRIDGE: case IPIBR_GET_DEVADDR: case IPIBR_ADD_VLAN_TO_INST: //vlan 配置 case IPI_VLAN_ADD: case IPI_VLAN_DEL: case IPI_VLAN_DISABLE: case IPI_VLAN_ENABLE: case IPI_VLAN_SET_PORT_TYPE: case IPI_VLAN_SET_DEFAULT_PVID: case IPI_VLAN_SET_NATIVE_VID: case IPI_VLAN_SET_MTU: case IPI_VLAN_ADD_VID_TO_PORT: case IPI_VLAN_DEL_VID_FROM_PORT: case IPIBR_ENABLE_IGMP_SNOOPING: case IPIBR_DISABLE_IGMP_SNOOPING: case IPIBR_GET_UNICAST_ENTRIES: case IPIBR_GET_MULTICAST_ENTRIES: case IPIBR_CLEAR_FDB_BY_MAC: case IPIBR_GARP_SET_BRIDGE_TYPE: case IPIBR_ADD_GMRP_SERVICE_REQ: case IPIBR_SET_EXT_FILTER: case IPIBR_SET_PVLAN_TYPE: case IPIBR_SET_PVLAN_ASSOCIATE: case IPIBR_SET_PVLAN_PORT_MODE: case IPIBR_SET_PVLAN_HOST_ASSOCIATION: case IPIBR_ADD_CVLAN_REG_ENTRY: case IPIBR_DEL_CVLAN_REG_ENTRY: case IPIBR_ADD_VLAN_TRANS_ENTRY: case IPIBR_DEL_VLAN_TRANS_ENTRY: case IPIBR_SET_PROTO_PROCESS: case IPI_VLAN_ADD_PRO_EDGE_PORT: case IPI_VLAN_DEL_PRO_EDGE_PORT: case IPI_VLAN_SET_PRO_EDGE_DEFAULT_PVID: case IPI_VLAN_SET_PRO_EDGE_UNTAGGED_VID: case IPIBR_CHANGE_VLAN_TYPE: case IPIBR_GET_IFINDEX_BY_MAC_VID: err = br_ioctl_bridge (sock, arg); //主要调用的ioctl break; #endif /* HAVE_L2 */ #ifdef HAVE_AUTHD case IPIEAPOL_GET_VERSION: case IPIEAPOL_ADD_PORT: case IPIEAPOL_DEL_PORT: case IPIEAPOL_SET_PORT_STATE: case IPIEAPOL_SET_PORT_MACAUTH_STATE: err = eapol_ioctl (sock, cmd, arg); break; #endif /* HAVE_AUTHD */ #ifdef HAVE_EFM case IPIEFM_GET_VERSION: case IPIEFM_ADD_PORT: case IPIEFM_DEL_PORT: case IPIEFM_SET_PORT_STATE: err = efm_ioctl (sock, cmd, arg); break; #endif /* HAVE_EFM */ default: { BDEBUG(\"Unknown command - cmd %ld\\n\ ioctl_cmd); up (&hal_ioctl_mutex); err = -EINVAL; } break; } up (&hal_ioctl_mutex); return err; } 4, 二层调用函数br_ioctl_bridge实现(函数br_ioctl.c) const char * const cmdString [] = { \"Invalid command\",/* 0*/ \"IPIBR_GET_VERSION\",/* 1*/ \"IPIBR_GET_BRIDGES\",/* 2*/ \"IPIBR_ADD_BRIDGE\",/* 3*/ \"IPIBR_DEL_BRIDGE\",/* 4*/ \"IPIBR_ADD_IF\",/* 5*/ \"IPIBR_DEL_IF\",/* 6*/ \"IPIBR_GET_BRIDGE_INFO\",/* 7*/ \"IPIBR_GET_PORT_LIST\",/* 8*/ \"IPIBR_SET_AGEING_TIME\",/* 9*/ \"IPIBR_SET_DYNAMIC_AGEING_INTERVAL\",/* 10*/ \"IPIBR_GET_PORT_INFO\",/* 11*/ \"IPIBR_SET_BRIDGE_LEARNING\",/* 12*/ \"IPIBR_GET_DYNFDB_ENTRIES\",/* 13*/ \"IPIBR_GET_STATFDB_ENTRIES\",/* 14*/ \"IPIBR_ADD_STATFDB_ENTRY\",/* 15*/ \"IPIBR_DEL_STATFDB_ENTRY\",/* 16*/ \"IPIBR_GET_DEVADDR\",/* 17*/ \"IPIBR_GET_PORT_STATE\",/* 18*/ \"IPIBR_SET_PORT_STATE\",/* 19*/ \"IPIBR_SET_PORT_FWDER_FLAGS\",/* 20*/ \"IPI_VLAN_ADD\",/* 21*/ \"IPI_VLAN_DEL\",/* 22*/ \"IPI_VLAN_SET_PORT_TYPE\",/* 23*/ \"IPI_VLAN_SET_DEFAULT_PVID\",/* 24*/ \"IPI_VLAN_ADD_VID_TO_PORT\",/* 25*/ \"IPI_VLAN_DEL_VID_FROM_PORT\",/* 26*/ \"IPIBR_FLUSH_FDB_BY_PORT\",/* 27 */ \"IPIBR_ADD_DYNAMIC_FDB_ENTRY\",/* 28 */ \"IPIBR_DEL_DYNAMIC_FDB_ENTRY\",/* 29*/ \"IPIBR_ADD_VLAN_TO_INST\",/* 30 */ \"IPIBR_ENABLE_IGMP_SNOOPING\",/* 31 */ \"IPIBR_DISABLE_IGMP_SNOOPING\",/* 32 */ \"IPI_VLAN_SET_NATIVE_VID\",/* 33 */ \"IPI_VLAN_SET_MTU\",/* 34 */ \"IPIBR_GET_UNICAST_ENTRIES\",/* 35 */ \"IPIBR_GET_MULTICAST_ENTRIES\",/* 36 */ \"IPIBR_CLEAR_FDB_BY_MAC\", /* 37 */ \"IPIBR_GARP_SET_BRIDGE_TYPE\", /* 38 */ \"IPIBR_ADD_GMRP_SERVICE_REQ\", /* 39 */ \"IPIBR_SET_EXT_FILTER\", /* 40 */ \"IPIBR_SET_PVLAN_TYPE\", /* 41 */ \"IPIBR_SET_PVLAN_ASSOCIATE\", /* 42 */ \"IPIBR_SET_PVLAN_PORT_MODE\", /* 43 */ \"IPIBR_SET_PVLAN_HOST_ASSOCIATION\", /* 44 */ \"IPIBR_ADD_CVLAN_REG_ENTRY\", /* 45 */ \"IPIBR_DEL_CVLAN_REG_ENTRY\", /* 46 */ \"IPIBR_ADD_VLAN_TRANS_ENTRY\", /* 47 */ \"IPIBR_DEL_VLAN_TRANS_ENTRY\", /* 48 */ \"IPIBR_SET_PROTO_PROCESS\", /* 49 */ \"IPIBR_CHANGE_VLAN_TYPE\", /* 50 */ \"IPIBR_GET_IFINDEX_BY_MAC_VID\", /* 55 */ \"IPIBR_DISABLE_AGEING\",/* 56 */ \"IPI_VLAN_DISABLE\",/* 57 */ \"IPI_VLAN_ENABLE\",/* 58 */ }; /* This function handles ioctls which have a bridge argument. */int br_ioctl_bridge (struct sock * sk, unsigned long arg){ unsigned long cmd; GET_CMD(); BDEBUG(\"br_ioctl_bridge:Executing cmd %ld (%s)\\n\ (cmd > 0 && cmd <= IPIBR_MAX_CMD) ? cmdString[cmd] : \"Unknown command\"); switch (cmd) { case IPIBR_GET_VERSION: { int version = ZEBOS_LAYER2_VERSION ; unsigned long arglist[2]; GET_ARGS(2); if (copy_to_user ((void *)arglist[1], &version, sizeof(int))) { return -EFAULT; } return 0; } case IPIBR_ADD_IF: case IPIBR_DEL_IF: { /* arglist contains (cmd, bridge_name, ifindex) */ unsigned long arglist[3]; char bridge_name[IFNAMSIZ + 1]; struct ipi_bridge *br = 0; struct net_device *dev = 0; int ret; GET_ARGS(3); memset (bridge_name, 0, IFNAMSIZ + 1); GET_STRING(arglist[1], bridge_name, IFNAMSIZ); GET_BRIDGE(); BDEBUG(\"Bridge %s selected\\n\ dev = dev_get_by_index (current->nsproxy->net_ns, arglist[2]); if (dev == NULL) { return -EINVAL; } BDEBUG(\"Executing add interface %ld\\n\ if (cmd == IPIBR_ADD_IF) { ret = br_add_if (br, dev); } else { ret = br_del_if (br, dev); } dev_put (dev); return ret; }………………………………..………………………………..………………………………...}