"docker容器创建网络源码分析"

  "docker容器网络创建"

Posted by Xu on November 6, 2017

Docker网络创建源码分析

在容器热迁移的过程中,网络的恢复最为复杂,我们需要保证在容器迁移到目的机之后的网络ip不发生变化,才能让客户端正常访问相应的服务。这里我们对Docker容器的网络创建过程做一个细致的整理分析,以便实现容器网络的迁移

从docker客户端post请求到docker daemon进行路由,我们找到网络创建函数postNetworkCreate:

创建过程如图所示: CreateNetwork

func (n *networkRouter) postNetworkCreate(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
	var create types.NetworkCreateRequest

	if err := httputils.ParseForm(r); err != nil {
		return err
	}

	if err := httputils.CheckForJSON(r); err != nil {
		return err
	}

	if err := json.NewDecoder(r.Body).Decode(&create); err != nil {
		return err
	}
    //解析请求数据,检验json格式,并将请求json数据进行解析得到NetworkCreateRequest
	if nws, err := n.cluster.GetNetworksByName(create.Name); err == nil && len(nws) > 0 {
	//首先从集群中根据网络名称获取网络
		return libnetwork.NetworkNameError(create.Name)
	}

	nw, err := n.backend.CreateNetwork(create)
	if err != nil {
		if _, ok := err.(libnetwork.ManagerRedirectError); !ok {
			return err
		}
		id, err := n.cluster.CreateNetwork(create)
		if err != nil {
			return err
		}
		nw = &types.NetworkCreateResponse{ID: id}
	}

	return httputils.WriteJSON(w, http.StatusCreated, nw)
}

从集群中根据网络名称获取网络:GetNetworksByName->getNetworks

func (c *Cluster) getNetworks(filters *swarmapi.ListNetworksRequest_Filters) ([]apitypes.NetworkResource, error) {
	c.mu.RLock()
	defer c.mu.RUnlock()

	state := c.currentNodeState()//获得目前节点状态
	if !state.IsActiveManager() {
		return nil, c.errNoManager(state)
	}

	ctx, cancel := c.getRequestContext()
	defer cancel()

	r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: filters})//根据名称Name获得网络
	if err != nil {
		return nil, err
	}

	var networks []apitypes.NetworkResource

	for _, network := range r.Networks {//将所有满足条件的network添加到networks中去
		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
	}

	return networks, nil
}

根据创建请求数据初步创建网络

主要是准备一些网络可选项信息,并判断网络是否合法有效,或者已经存在 CreateNetWork(create)->CreateNetWork(create,”“,false)

func (daemon *Daemon) createNetwork(create types.NetworkCreateRequest, id string, agent bool) (*types.NetworkCreateResponse, error) {
	// If there is a pending ingress network creation wait here
	// since ingress network creation can happen via node download
	// from manager or task download.
	if isIngressNetwork(create.Name) {//看是否是一个等待进入的网络
		defer ingressWait()()
	}

	if runconfig.IsPreDefinedNetwork(create.Name) && !agent {
	//判断是否是一个提前定义的网络
		err := fmt.Errorf("%s is a pre-defined network and cannot be created", create.Name)
		return nil, apierrors.NewRequestForbiddenError(err)
	}

	var warning string
	nw, err := daemon.GetNetworkByName(create.Name)//根据网络名称获取网络,看是否已经存在该网络
	if err != nil {
		if _, ok := err.(libnetwork.ErrNoSuchNetwork); !ok {
			return nil, err
		}
	}
	if nw != nil {
		if create.CheckDuplicate {
			return nil, libnetwork.NetworkNameError(create.Name)
		}
		warning = fmt.Sprintf("Network with name %s (id : %s) already exists", nw.Name(), nw.ID())
	}

	c := daemon.netController
	driver := create.Driver
	if driver == "" {
		driver = c.Config().Daemon.DefaultDriver
	}

	nwOptions := []libnetwork.NetworkOption{
		libnetwork.NetworkOptionEnableIPv6(create.EnableIPv6),
		libnetwork.NetworkOptionDriverOpts(create.Options),
		libnetwork.NetworkOptionLabels(create.Labels),
		libnetwork.NetworkOptionAttachable(create.Attachable),
	}//根据相关信息填写网络选项信息

	if create.IPAM != nil {//网络创建选项中是否有ipam网络地址管理器选项
		ipam := create.IPAM
		v4Conf, v6Conf, err := getIpamConfig(ipam.Config)
		if err != nil {
			return nil, err
		}
		nwOptions = append(nwOptions, libnetwork.NetworkOptionIpam(ipam.Driver, "", v4Conf, v6Conf, ipam.Options))
	}

	if create.Internal {
		nwOptions = append(nwOptions, libnetwork.NetworkOptionInternalNetwork())
	}
	if agent {
		nwOptions = append(nwOptions, libnetwork.NetworkOptionDynamic())
		nwOptions = append(nwOptions, libnetwork.NetworkOptionPersist(false))
	}

	if isIngressNetwork(create.Name) {
		nwOptions = append(nwOptions, libnetwork.NetworkOptionIngress())
	}
//以上均为根据create中的信息填写网络创建选项nwOptions
	n, err := c.NewNetwork(driver, create.Name, id, nwOptions...)//根据网络驱动,网络名称和网络创建可选项来创建网络
	if err != nil {
		return nil, err
	}

	daemon.pluginRefCount(driver, driverapi.NetworkPluginEndpointType, plugingetter.Acquire)
	if create.IPAM != nil {
		daemon.pluginRefCount(create.IPAM.Driver, ipamapi.PluginEndpointType, plugingetter.Acquire)
	}
	daemon.LogNetworkEvent(n, "create")

	return &types.NetworkCreateResponse{
		ID:      n.ID(),
		Warning: warning,
	}, nil
}

进一步实现网络的创建的公共实现(不涉及到网络类型之间的区别)

根据网络可选项逐个进行函数处理,然后生成网络id,获取网络驱动,,然后将endpointCount及network对象存入KV数据库

func (c *controller) NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error) {
	if id != "" {//这里的id为空
		c.networkLocker.Lock(id)
		defer c.networkLocker.Unlock(id)

		if _, err := c.NetworkByID(id); err == nil {
			return nil, NetworkNameError(id)
		}
	}

	if !config.IsValidName(name) {
		return nil, ErrInvalidName(name)
	}

	if id == "" {
		id = stringid.GenerateRandomID()//随机产生一个网络id
	}

	defaultIpam := defaultIpamForNetworkType(networkType)//根据网络类型生成默认网络ip地址管理器
	// Construct the network object
	network := &network{
		name:        name,
		networkType: networkType,
		generic:     map[string]interface{}{netlabel.GenericData: make(map[string]string)},
		ipamType:    defaultIpam,
		id:          id,
		created:     time.Now(),
		ctrlr:       c,
		persist:     true,
		drvOnce:     &sync.Once{},
	}//构造网络对象

	network.processOptions(options...)//逐个处理网络可选项nwOption信息

	_, cap, err := network.resolveDriver(networkType, true)//从网络驱动缓存中根据网络类型来获取对应驱动,及容量,true代表缓存中没有时可以尝试加载驱动。
	if err != nil {
		return nil, err
	}

	if cap.DataScope == datastore.GlobalScope && !c.isDistributedControl() && !network.dynamic {
		if c.isManager() {
			// For non-distributed controlled environment, globalscoped non-dynamic networks are redirected to Manager
			return nil, ManagerRedirectError(name)
		}

		return nil, types.ForbiddenErrorf("Cannot create a multi-host network from a worker node. Please create the network from a manager node.")
	}

	// Make sure we have a driver available for this network type
	// before we allocate anything.
	if _, err := network.driver(true); err != nil {//获取这个网络的驱动
		return nil, err
	}

	err = network.ipamAllocate()//给网络分配地址管理器
	if err != nil {
		return nil, err
	}
	defer func() {
		if err != nil {
			network.ipamRelease()
		}
	}()

	err = c.addNetwork(network)//添加网络到网络控制器中,并根据网络类型创建对应的网络,添加到相应驱动的网络缓存中
	if err != nil {
		return nil, err
	}
	defer func() {
		if err != nil {
			if e := network.deleteNetwork(); e != nil {
				logrus.Warnf("couldn't roll back driver network on network %s creation failure: %v", network.name, err)
			}
		}
	}()

	// First store the endpoint count, then the network. To avoid to
	// end up with a datastore containing a network and not an epCnt,
	// in case of an ungraceful shutdown during this function call.
	epCnt := &endpointCnt{n: network}
	if err = c.updateToStore(epCnt); err != nil {
		return nil, err
	}
	defer func() {
		if err != nil {
			if e := c.deleteFromStore(epCnt); e != nil {
				logrus.Warnf("could not rollback from store, epCnt %v on failure (%v): %v", epCnt, err, e)
			}
		}
	}()

	network.epCnt = epCnt
	if err = c.updateToStore(network); err != nil {
		return nil, err
	}//将network信息及endpoint count信息更新到数据库
	joinCluster(network)//将网络尝试加入集群
	if !c.isDistributedControl() {
		arrangeIngressFilterRule()
	}

	return network, nil
}

在集群中创建网络

在集群中创建网络

// CreateNetwork creates a new cluster managed network.
func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
	c.mu.RLock()
	defer c.mu.RUnlock()

	state := c.currentNodeState()//目前节点的状态
	if !state.IsActiveManager() {
		return "", c.errNoManager(state)
	}

	if runconfig.IsPreDefinedNetwork(s.Name) {//是否为提前定义的网络
		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
		return "", apierrors.NewRequestForbiddenError(err)
	}

	ctx, cancel := c.getRequestContext()
	defer cancel()

	networkSpec := convert.BasicNetworkCreateToGRPC(s)//根据网络请求得到网络配置相关信息
	r, err := state.controlClient.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})//根据网络配置创建网络
	if err != nil {
		return "", err
	}

	return r.Network.ID, nil
}
func (s *Server) CreateNetwork(ctx context.Context, request *api.CreateNetworkRequest) (*api.CreateNetworkResponse, error) {
	// if you change this function, you have to change createInternalNetwork in
	// the tests to match it (except the part where we check the label).
	if err := validateNetworkSpec(request.Spec, s.pg); err != nil {
		return nil, err
	}

	if _, ok := request.Spec.Annotations.Labels["com.docker.swarm.internal"]; ok {
		return nil, grpc.Errorf(codes.PermissionDenied, "label com.docker.swarm.internal is for predefined internal networks and cannot be applied by users")
	}

	// TODO(mrjana): Consider using `Name` as a primary key to handle
	// duplicate creations. See #65
	n := &api.Network{
		ID:   identity.NewID(),
		Spec: *request.Spec,
	}

	err := s.store.Update(func(tx store.Tx) error {//将网络信息存储到网络存储器boltdb中
		return store.CreateNetwork(tx, n)
	})
	if err != nil {
		return nil, err
	}

	return &api.CreateNetworkResponse{
		Network: n,
	}, nil
}

不同网络类型的网络创建具体实现

进入接口,根据网络驱动的类型调用不同的网络创建函数,然后创建得到不同类型的network写入数据库,然后添加到驱动的网络缓存d.networks中去

这里假设我们添加的网络类型为Overlay,则如下所示

func (c *controller) addNetwork(n *network) error {
	d, err := n.driver(true)
	if err != nil {
		return err
	}

	// Create the network
	if err := d.CreateNetwork(n.id, n.generic, n, n.getIPData(4), n.getIPData(6)); err != nil {
		return err
	}

	n.startResolver()

	return nil
}

//其中Overlay中d.CreateNetWork()实现如下:获取网络的通用数据,解析得到虚拟扩展网络vxlan中的虚拟网络id:vnis,还有安全选项,驱动的MTU(最大传输单元),然后填充子网信息,最后将网络network添加到驱动缓存
func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
	if id == "" {
		return fmt.Errorf("invalid network id")
	}
	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
		return types.BadRequestErrorf("ipv4 pool is empty")
	}

	// Since we perform lazy configuration make sure we try
	// configuring the driver when we enter CreateNetwork
	if err := d.configure(); err != nil {
		return err
	}

	n := &network{
		id:        id,
		driver:    d,
		endpoints: endpointTable{},
		once:      &sync.Once{},
		subnets:   []*subnet{},
	}

	vnis := make([]uint32, 0, len(ipV4Data))
	if gval, ok := option[netlabel.GenericData]; ok {
	//得到该网络可选项中的通用数据:OverlayIDList,secureOption,DriverMTU
		optMap := gval.(map[string]string)
		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
			vniStrings := strings.Split(val, ",")
			for _, vniStr := range vniStrings {
				vni, err := strconv.Atoi(vniStr)
				if err != nil {
					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
				}

				vnis = append(vnis, uint32(vni))
			}
		}//根据该网络对象中 虚拟化扩展网络vxlan(两层vlan)对象来获取所有Overlay中的虚拟网络id:vnis
		if _, ok := optMap[secureOption]; ok {
			n.secure = true
		}
		if val, ok := optMap[netlabel.DriverMTU]; ok {
			var err error
			if n.mtu, err = strconv.Atoi(val); err != nil {
				return fmt.Errorf("failed to parse %v: %v", val, err)
			}
			if n.mtu < 0 {
				return fmt.Errorf("invalid MTU value: %v", n.mtu)
			}
		}
	}//根据网络可选项配置信息来配置新建的网络对象network

	// If we are getting vnis from libnetwork, either we get for
	// all subnets or none.
	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
	}

	for i, ipd := range ipV4Data {//遍历ipV4Data来填充子网信息
		s := &subnet{
			subnetIP: ipd.Pool,
			gwIP:     ipd.Gateway,
			once:     &sync.Once{},
		}

		if len(vnis) != 0 {
			s.vni = vnis[i]
		}

		n.subnets = append(n.subnets, s)
	}

	if err := n.writeToStore(); err != nil {//将网络信息写入数据库
		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
	}

	// Make sure no rule is on the way from any stale secure network
	if !n.secure {
		for _, vni := range vnis {
			programMangle(vni, false)
		}
	}

	if nInfo != nil {
		if err := nInfo.TableEventRegister(ovPeerTable); err != nil {
			return err
		}
	}

	d.addNetwork(n)//将网络id和网络network的映射关系添加到d.networks缓存中去
	return nil
}