/* Copyright 2019 Stellar Project Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package server import ( "bufio" "bytes" "context" "fmt" "io/ioutil" "net" "net/url" "path/filepath" "runtime" "runtime/pprof" "strings" "time" ping "github.com/digineo/go-ping" "github.com/gogo/protobuf/proto" ptypes "github.com/gogo/protobuf/types" "github.com/gomodule/redigo/redis" "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stellarproject/heimdall" v1 "github.com/stellarproject/heimdall/api/v1" "github.com/stellarproject/heimdall/client" "github.com/stellarproject/heimdall/version" "github.com/stellarproject/heimdall/wg" "google.golang.org/grpc" ) const ( masterKey = "heimdall:master" clusterKey = "heimdall:key" keypairsKey = "heimdall:keypairs" nodesKey = "heimdall:nodes" nodeJoinKey = "heimdall:join" peersKey = "heimdall:peers" routesKey = "heimdall:routes" peerIPsKey = "heimdall:peerips" nodeIPsKey = "heimdall:nodeips" nodeNetworksKey = "heimdall:nodenetworks" authorizedPeersKey = "heimdall:authorized" wireguardConfigDir = "/etc/wireguard" ) var ( empty = &ptypes.Empty{} masterHeartbeatInterval = time.Second * 5 nodeHeartbeatInterval = time.Second * 15 nodeHeartbeatExpiry = 86400 peerConfigUpdateInterval = time.Second * 10 // ErrRouteExists is returned when a requested route is already reserved ErrRouteExists = errors.New("route already reserved") // ErrNodeDoesNotExist is returned when an invalid node is requested ErrNodeDoesNotExist = errors.New("node does not exist") ) // Server represents the Heimdall server type Server struct { cfg *heimdall.Config rpool *redis.Pool wpool *redis.Pool replicaCh chan struct{} } // NewServer returns a new Heimdall server func NewServer(cfg *heimdall.Config) (*Server, error) { pool, err := getPool(cfg.RedisURL) if err != nil { return nil, err } return &Server{ cfg: cfg, rpool: pool, wpool: pool, replicaCh: make(chan struct{}, 1), }, nil } // Register enables callers to register this service with an existing GRPC server func (s *Server) Register(server *grpc.Server) error { v1.RegisterHeimdallServer(server, s) return nil } // GenerateProfile generates a new Go profile func (s *Server) GenerateProfile() (string, error) { tmpfile, err := ioutil.TempFile("", "heimdall-profile-") if err != nil { return "", err } runtime.GC() if err := pprof.WriteHeapProfile(tmpfile); err != nil { return "", err } tmpfile.Close() return tmpfile.Name(), nil } func (s *Server) Run() error { ctx := context.Background() // check peer address and make a grpc request for master info if present if s.cfg.GRPCPeerAddress != "" { logrus.Debugf("joining %s", s.cfg.GRPCPeerAddress) c, err := s.getClient(s.cfg.GRPCPeerAddress) if err != nil { return err } defer c.Close() r, err := c.Join(&v1.JoinRequest{ ID: s.cfg.ID, ClusterKey: s.cfg.ClusterKey, GRPCAddress: s.cfg.GRPCAddress, EndpointIP: s.cfg.EndpointIP, EndpointPort: uint64(s.cfg.EndpointPort), InterfaceName: s.cfg.InterfaceName, }) if err != nil { return err } logrus.Debugf("master info received: id=%s grpc=%s", r.Master.ID, r.Master.GRPCAddress) // start tunnel if err := s.updatePeerConfig(ctx, r.Node, r.Peers); err != nil { return errors.Wrap(err, "error updating peer config") } // wait for tunnel to come up logrus.Infof("waiting for master %s", r.Master.ID) if err := s.waitForMaster(ctx, r.Master); err != nil { return errors.Wrap(err, "error waiting for master") } logrus.Debugf("joining master: %s", r.Master.ID) if err := s.joinMaster(r.Master); err != nil { return err } logrus.Infof("waiting for redis sync with %s", r.Master.ID) if err := s.waitForRedisSync(ctx); err != nil { return err } go s.replicaMonitor() } else { if err := s.configureNode(); err != nil { return err } } // ensure keypair if _, err := s.getOrCreateKeyPair(ctx, s.cfg.ID); err != nil { return err } // ensure node network subnet if err := s.ensureNetworkSubnet(ctx, s.cfg.ID); err != nil { return err } // initial node update if err := s.updateLocalNodeInfo(ctx); err != nil { return err } // start node heartbeat to update in redis go s.updateNodeInfo(ctx) // initial peer info update if err := s.updatePeerInfo(ctx, s.cfg.ID); err != nil { return err } // initial config update node, err := s.getNode(ctx, s.cfg.ID) if err != nil { return err } peers, err := s.getPeers(ctx) if err != nil { return err } if err := s.updatePeerConfig(ctx, node, peers); err != nil { return err } // start peer config updater to configure wireguard as peers join go s.peerUpdater(ctx) // start listener for pub/sub errCh := make(chan error, 1) go func() { c := s.rpool.Get() defer c.Close() psc := redis.PubSubConn{Conn: c} psc.Subscribe(nodeJoinKey) for { switch v := psc.Receive().(type) { case redis.Message: // TODO: handle join notify logrus.Debug("join notify") case redis.Subscription: default: logrus.Debugf("unknown message type %T", v) } } }() err = <-errCh return err } func (s *Server) Stop() error { s.rpool.Close() s.wpool.Close() return nil } func getPool(redisUrl string) (*redis.Pool, error) { pool := redis.NewPool(func() (redis.Conn, error) { conn, err := redis.DialURL(redisUrl) if err != nil { return nil, errors.Wrap(err, "unable to connect to redis") } u, err := url.Parse(redisUrl) if err != nil { return nil, err } auth, ok := u.User.Password() if ok { if _, err := conn.Do("CONFIG", "SET", "MASTERAUTH", auth); err != nil { return nil, errors.Wrap(err, "error authenticating to redis") } } return conn, nil }, 10) return pool, nil } func (s *Server) waitForMaster(ctx context.Context, m *v1.Master) error { p, err := ping.New("0.0.0.0", "") if err != nil { return err } defer p.Close() doneCh := make(chan time.Duration) errCh := make(chan error) go func() { for { ip, err := net.ResolveIPAddr("ip4", m.GatewayIP) if err != nil { errCh <- err return } rtt, err := p.Ping(ip, time.Second*30) if err != nil { errCh <- err } doneCh <- rtt } }() select { case rtt := <-doneCh: logrus.Debugf("rtt master ping: %s", rtt) return nil case err := <-errCh: return err } return nil } func (s *Server) waitForRedisSync(ctx context.Context) error { doneCh := make(chan bool) errCh := make(chan error) go func() { for { info, err := redis.String(s.local(ctx, "INFO", "REPLICATION")) if err != nil { logrus.Warn(err) continue } b := bytes.NewBufferString(info) s := bufio.NewScanner(b) for s.Scan() { v := s.Text() parts := strings.SplitN(v, ":", 2) if parts[0] == "master_link_status" { if parts[1] == "up" { doneCh <- true return } } } time.Sleep(time.Second * 1) } }() select { case <-doneCh: return nil case err := <-errCh: return err case <-time.After(nodeHeartbeatInterval): return fmt.Errorf("timeout waiting on sync") } } func (s *Server) ensureNetworkSubnet(ctx context.Context, id string) error { network, err := redis.String(s.local(ctx, "GET", s.getNodeNetworkKey(id))) if err != nil { if err != redis.ErrNil { return err } // allocate initial node subnet r, err := parseSubnetRange(s.cfg.NodeNetwork) if err != nil { return err } // iterate node networks to find first free nodeNetworkKeys, err := redis.Strings(s.local(ctx, "KEYS", s.getNodeNetworkKey("*"))) if err != nil { return err } lookup := map[string]struct{}{} for _, netKey := range nodeNetworkKeys { n, err := redis.String(s.local(ctx, "GET", netKey)) if err != nil { return err } lookup[n] = struct{}{} } subnet := r.Subnet size, _ := subnet.Mask.Size() for { n, ok := nextSubnet(subnet, size) if !ok { return fmt.Errorf("error getting next subnet") } if _, exists := lookup[n.String()]; exists { subnet = n continue } logrus.Debugf("allocated network %s for %s", n.String(), id) if err := s.updateNodeNetwork(ctx, id, n.String()); err != nil { return err } break } return nil } logrus.Debugf("node network for %s: %s", id, network) return nil } func (s *Server) getOrCreateKeyPair(ctx context.Context, id string) (*v1.KeyPair, error) { key := s.getKeyPairKey(id) keyData, err := redis.Bytes(s.master(ctx, "GET", key)) if err != nil { if err != redis.ErrNil { return nil, err } logrus.Debugf("generating new keypair for %s", s.cfg.ID) privateKey, publicKey, err := wg.GenerateWireguardKeys(ctx) if err != nil { return nil, err } keyPair := &v1.KeyPair{ PrivateKey: privateKey, PublicKey: publicKey, } data, err := proto.Marshal(keyPair) if err != nil { return nil, err } if _, err := s.master(ctx, "SET", key, data); err != nil { return nil, err } return keyPair, nil } var keyPair v1.KeyPair if err := proto.Unmarshal(keyData, &keyPair); err != nil { return nil, err } return &keyPair, nil } func (s *Server) getNodeKey(id string) string { return fmt.Sprintf("%s:%s", nodesKey, id) } func (s *Server) getRouteKey(network string) string { return fmt.Sprintf("%s:%s", routesKey, network) } func (s *Server) getPeerKey(id string) string { return fmt.Sprintf("%s:%s", peersKey, id) } func (s *Server) getKeyPairKey(id string) string { return fmt.Sprintf("%s:%s", keypairsKey, id) } func (s *Server) getNodeNetworkKey(id string) string { return fmt.Sprintf("%s:%s", nodeNetworksKey, id) } func (s *Server) getClient(addr string) (*client.Client, error) { cfg := &heimdall.Config{ TLSClientCertificate: s.cfg.TLSClientCertificate, TLSClientKey: s.cfg.TLSClientKey, TLSInsecureSkipVerify: s.cfg.TLSInsecureSkipVerify, } opts, err := client.DialOptionsFromConfig(cfg) if err != nil { return nil, err } opts = append(opts, grpc.WithBlock(), grpc.WithUserAgent(fmt.Sprintf("%s/%s", version.Name, version.Version)), ) return client.NewClient(s.cfg.ID, addr, opts...) } func (s *Server) getClusterKey(ctx context.Context) (string, error) { return redis.String(s.local(ctx, "GET", clusterKey)) } func (s *Server) getWireguardConfigPath() string { return filepath.Join(wireguardConfigDir, s.cfg.InterfaceName+".conf") } func (s *Server) getTunnelName() string { return s.cfg.InterfaceName } func (s *Server) local(ctx context.Context, cmd string, args ...interface{}) (interface{}, error) { return s.do(ctx, s.rpool, cmd, args...) } func (s *Server) master(ctx context.Context, cmd string, args ...interface{}) (interface{}, error) { return s.do(ctx, s.wpool, cmd, args...) } func (s *Server) do(ctx context.Context, pool *redis.Pool, cmd string, args ...interface{}) (interface{}, error) { conn, err := pool.GetContext(ctx) if err != nil { return nil, err } defer conn.Close() r, err := conn.Do(cmd, args...) return r, err }