"docker容器网络恢复分析一"

  "将sandbox通过endpoint连接到网络"

Posted by Xu on November 10, 2017

Docker容器网络恢复解析一

延续docker启动容器的网络初始化的分析思路,这里将就如何将endpoint加入到网络进行分析。 ConnectSandboxToNetwork

步骤一:构建将endpoint连接到网络network所需要的网络信息,沙盒对象及Endpoint对象

func (daemon *Daemon) connectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings, updateSettings bool) (err error) {
	start := time.Now()
	if container.HostConfig.NetworkMode.IsContainer() {//首先判断是否为容器共享网络
		return runconfig.ErrConflictSharedNetwork
	}
	if containertypes.NetworkMode(idOrName).IsBridge() &&
		daemon.configStore.DisableBridge { //判断是否为网桥模式
		container.Config.NetworkDisabled = true
		return nil
	}
	if endpointConfig == nil {
		endpointConfig = &networktypes.EndpointSettings{}//新建一个Endpoint设置结构体,存储该endpoint的配置信息
	}

	n, config, err := daemon.findAndAttachNetwork(container, idOrName, endpointConfig)//根据id或name获取网络network对象和对应的配置信息config
	if err != nil {
		return err
	}
	if n == nil {
		return nil
	}

	var operIPAM bool
	if config != nil {
		if epConfig, ok := config.EndpointsConfig[n.Name()]; ok {
			if endpointConfig.IPAMConfig == nil ||
				(endpointConfig.IPAMConfig.IPv4Address == "" &&
					endpointConfig.IPAMConfig.IPv6Address == "" &&
					len(endpointConfig.IPAMConfig.LinkLocalIPs) == 0) {
				operIPAM = true
			}

			// copy IPAMConfig and NetworkID from epConfig via AttachNetwork
			endpointConfig.IPAMConfig = epConfig.IPAMConfig
			endpointConfig.NetworkID = epConfig.NetworkID
		}
	}//将ipconfig的中得到的endpoint配置信息写入到endpointConfig结构体中去

	err = daemon.updateNetworkConfig(container, n, endpointConfig, updateSettings)//更新网络的配置
	if err != nil {
		return err
	}

	controller := daemon.netController
	sb := daemon.getNetworkSandbox(container)//获取该容器的网络沙盒对象
	createOptions, err := container.BuildCreateEndpointOptions(n, endpointConfig, sb, daemon.configStore.DNS)//构建创建Endpoint的选项结构体对象
	if err != nil {
		return err
	}

	endpointName := strings.TrimPrefix(container.Name, "/")
	ep, err := n.CreateEndpoint(endpointName, createOptions...)//根据Endpoint创建选项和名称创建Endpoint
	if err != nil {
		return err
	}
	defer func() {
		if err != nil {
			if e := ep.Delete(false); e != nil {
				logrus.Warnf("Could not rollback container connection to network %s", idOrName)
			}
		}
	}()
	container.NetworkSettings.Networks[n.Name()] = &network.EndpointSettings{//配置容器的网络配置为该Endpoint的配置信息
		EndpointSettings: endpointConfig,
		IPAMOperational:  operIPAM,
	}
	if _, ok := container.NetworkSettings.Networks[n.ID()]; ok {
		delete(container.NetworkSettings.Networks, n.ID())
	}

	if err := daemon.updateEndpointNetworkSettings(container, n, ep); err != nil {//更新Endpoint的网路设置
		return err
	}

	if sb == nil {//如果该沙盒对象为空,则新建一个沙盒对并更新容器的沙盒网络配置信息
		options, err := daemon.buildSandboxOptions(container)
		if err != nil {
			return err
		}
		sb, err = controller.NewSandbox(container.ID, options...)
		if err != nil {
			return err
		}

		container.UpdateSandboxNetworkSettings(sb)
	}

	joinOptions, err := container.BuildJoinOptions(n)//构建endpoint加入沙盒的选项对象
	if err != nil {
		return err
	}

	if err := ep.Join(sb, joinOptions...); err != nil {//根据沙盒的加入选项将endpoint加入到该沙盒中,将在下面部分进行具体分析
		return err
	}

	if !container.Managed {
		// add container name/alias to DNS
		if err := daemon.ActivateContainerServiceBinding(container.Name); err != nil {//激活容器服务绑定
			return fmt.Errorf("Activate container service binding for %s failed: %v", container.Name, err)
		}
	}

	if err := container.UpdateJoinInfo(n, ep); err != nil {//更新endpoint加入信息
		return fmt.Errorf("Updating join info failed: %v", err)
	}

	container.NetworkSettings.Ports = getPortMapInfo(sb)//获取沙盒的端口映射信息

	daemon.LogNetworkEventWithAttributes(n, "connect", map[string]string{"container": container.ID})
	networkActions.WithValues("connect").UpdateSince(start)
	return nil
}

步骤二:endpoint连接沙盒sandbox,构建sandbox和endpoint之间的关联关系

其中ep.Join调用sbJoin实现如下

 
func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error {
	n, err := ep.getNetworkFromStore()//一个Endpoint属于一个Sandbox和一个Network,用于将Sandbox连接到Network。这里获取该Endpoint所连接的网络结构体
	if err != nil {
		return fmt.Errorf("failed to get network from store during join: %v", err)
	}

	ep, err = n.getEndpointFromStore(ep.ID())//根据endpoint的id获取endpoint结构
	if err != nil {
		return fmt.Errorf("failed to get endpoint from store during join: %v", err)
	}

	ep.Lock()
	if ep.sandboxID != "" {//若该endpoint结构中的sandboxid不为空,则说明该endpoint已经附属于一个sandbox不能再次加入另一个sandbox
		ep.Unlock()
		return types.ForbiddenErrorf("another container is attached to the same network endpoint")
	}
	ep.network = n
	ep.sandboxID = sb.ID()
	ep.joinInfo = &endpointJoinInfo{}
	epid := ep.id
	ep.Unlock()
	defer func() {
		if err != nil {
			ep.Lock()
			ep.sandboxID = ""
			ep.Unlock()
		}
	}()

	nid := n.ID()

	ep.processOptions(options...)//逐个处理endpoint的加入选项函数

	d, err := n.driver(true)//获取该网络的网络驱动
	if err != nil {
		return fmt.Errorf("failed to join endpoint: %v", err)
	}

	err = d.Join(nid, epid, sb.Key(), ep, sb.Labels())//通过网络驱动将endpoint加入到指定sandbox中,这里的驱动为overlay的网络驱动,接下来的部分将会深入讲解,如何将endpoint加入到overlay网络,详见“endpoint加入overlay网络”。
	if err != nil {
		return err
	}
	defer func() {
		if err != nil {
			if err := d.Leave(nid, epid); err != nil {
				logrus.Warnf("driver leave failed while rolling back join: %v", err)
			}
		}
	}()

	// Watch for service records
	if !n.getController().isAgent() {
		n.getController().watchSvcRecord(ep)
	}

	if doUpdateHostsFile(n, sb) {//更新主机文件信息
		address := ""
		if ip := ep.getFirstInterfaceAddress(); ip != nil {
			address = ip.String()
		}
		if err = sb.updateHostsFile(address); err != nil {
			return err
		}
	}
	if err = sb.updateDNS(n.enableIPv6); err != nil {
		return err//更新dns
	}

	// Current endpoint providing external connectivity for the sandbox
	extEp := sb.getGatewayEndpoint()//获取沙盒网关endpoint对象

	sb.Lock()
	heap.Push(&sb.endpoints, ep)
	sb.Unlock()
	defer func() {
		if err != nil {
			sb.removeEndpoint(ep)
		}
	}()

	if err = sb.populateNetworkResources(ep); err != nil {
		return err//根据endpoint信息收集沙盒网络资源信息
	}

	if err = n.getController().updateToStore(ep); err != nil {
		return err//获取网络控制器,并将endpoint信息更新到存储器保存
	}

	if err = ep.addDriverInfoToCluster(); err != nil {
		return err//添加驱动信息到集群
	}

	if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
		return sb.setupDefaultGW()
	}

	moveExtConn := sb.getGatewayEndpoint() != extEp

	if moveExtConn {//移除endpoint的连接
		if extEp != nil {
			logrus.Debugf("Revoking external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
			extN, err := extEp.getNetworkFromStore()
			if err != nil {
				return fmt.Errorf("failed to get network from store during join: %v", err)
			}
			extD, err := extN.driver(true)
			if err != nil {
				return fmt.Errorf("failed to join endpoint: %v", err)
			}
			if err = extD.RevokeExternalConnectivity(extEp.network.ID(), extEp.ID()); err != nil {
				return types.InternalErrorf(
					"driver failed revoking external connectivity on endpoint %s (%s): %v",
					extEp.Name(), extEp.ID(), err)
			}
			defer func() {
				if err != nil {
					if e := extD.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); e != nil {
						logrus.Warnf("Failed to roll-back external connectivity on endpoint %s (%s): %v",
							extEp.Name(), extEp.ID(), e)
					}
				}
			}()
		}
		if !n.internal {
			logrus.Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
			if err = d.ProgramExternalConnectivity(n.ID(), ep.ID(), sb.Labels()); err != nil {
				return types.InternalErrorf(
					"driver failed programming external connectivity on endpoint %s (%s): %v",
					ep.Name(), ep.ID(), err)
			}
		}

	}

	if !sb.needDefaultGW() {
		if err := sb.clearDefaultGW(); err != nil {
			logrus.Warnf("Failure while disconnecting sandbox %s (%s) from gateway network: %v",
				sb.ID(), sb.ContainerID(), err)
		}
	}

	return nil
}

步骤三:根据网络驱动将Sandbox通过Endpoint加入overlay网络

// Join method is invoked when a Sandbox is attached to an endpoint.
func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, options map[string]interface{}) error {
	if err := validateID(nid, eid); err != nil {//验证网络id和endpoint id
		return err
	}

	n := d.network(nid)//获取网络结构体
	if n == nil {
		return fmt.Errorf("could not find network with id %s", nid)
	}

	ep := n.endpoint(eid)//获取endpoint对应结构体
	if ep == nil {
		return fmt.Errorf("could not find endpoint with id %s", eid)
	}

	if n.secure && len(d.keys) == 0 {
		return fmt.Errorf("cannot join secure network: encryption keys not present")
	}

	nlh := ns.NlHandle()//获取netlink处理器

	if n.secure && !nlh.SupportsNetlinkFamily(syscall.NETLINK_XFRM) {
		return fmt.Errorf("cannot join secure network: required modules to install IPSEC rules are missing on host")
	}

	s := n.getSubnetforIP(ep.addr)//获取子网ip
	if s == nil {
		return fmt.Errorf("could not find subnet for endpoint %s", eid)
	}

	if err := n.obtainVxlanID(s); err != nil {
	//获取虚拟扩展网络id
		return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err)
	}

	if err := n.joinSandbox(false); err != nil {
	//连接网络和sandbox,如何连接沙盒和网络,这里涉及到沙盒的初始化过程,我们将在下一章进行详细的分析
		return fmt.Errorf("network sandbox join failed: %v", err)
	}

	if err := n.joinSubnetSandbox(s, false); err != nil {
	//连接子网沙盒到网络
		return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err)
	}

	// joinSubnetSandbox gets called when an endpoint comes up on a new subnet in the
	// overlay network. Hence the Endpoint count should be updated outside joinSubnetSandbox
	n.incEndpointCount()//增加网络endpoint的个数

	sbox := n.sandbox()//获取网络的沙盒对象

	overlayIfName, containerIfName, err := createVethPair()//创建虚拟网卡对,得到overlayIfName(overlay网络端虚拟网卡名称)和containerIfName(容器端虚拟网卡名称)
	if err != nil {
		return err
	}

	ep.ifName = containerIfName//设置endpoint在容器这端的虚拟网卡名称

	if err := d.writeEndpointToStore(ep); err != nil {//将endpoint对象存储在store中去
		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
	}

	// Set the container interface and its peer MTU to 1450 to allow
	// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
	// outer UDP(8) + vxlan header(8))
	mtu := n.maxMTU()//获取最大传输单元

	veth, err := nlh.LinkByName(overlayIfName)//通过overlay端虚拟网卡的名称获取网卡对象
	if err != nil {
		return fmt.Errorf("cound not find link by name %s: %v", overlayIfName, err)
	}
	err = nlh.LinkSetMTU(veth, mtu)//设置该网卡的最大传输单元
	if err != nil {
		return err
	}

	if err := sbox.AddInterface(overlayIfName, "veth",
		sbox.InterfaceOptions().Master(s.brName)); err != nil {
		return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err)
	}//添加虚拟网卡到该沙盒中去

	veth, err = nlh.LinkByName(containerIfName)//获取容器端的虚拟网卡对象
	if err != nil {
		return fmt.Errorf("could not find link by name %s: %v", containerIfName, err)
	}
	err = nlh.LinkSetMTU(veth, mtu)//同样设置网卡的最大传输单元
	if err != nil {
		return err
	}

	if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
		return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err)
	}//给这个容器的虚拟网卡设置物理mac地址

	for _, sub := range n.subnets {//添加overlay网络中的子网subnet的静态路由信息
		if sub == s {
			continue
		}
		if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
			logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id)
		}
	}

	if iNames := jinfo.InterfaceName(); iNames != nil {
		err = iNames.SetNames(containerIfName, "eth")
		if err != nil {
			return err
		}
	}//设置该endpoint的srcName(容器端虚拟网卡的名称),dstPrefix(“eth”)

	d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac,
		net.ParseIP(d.advertiseAddress), true)//构建网卡对的映射关系pmap

	if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
		logrus.Warn(err)
	}

	buf, err := proto.Marshal(&PeerRecord{
		EndpointIP:       ep.addr.String(),
		EndpointMAC:      ep.mac.String(),
		TunnelEndpointIP: d.advertiseAddress,
	})
	if err != nil {
		return err
	}

	if err := jinfo.AddTableEntry(ovPeerTable, eid, buf); err != nil {
		logrus.Errorf("overlay: Failed adding table entry to joininfo: %v", err)
	}//添加表项连接信息

	d.pushLocalEndpointEvent("join", nid, eid)//通知本地endpoint连接事件信息

	return nil
}