mirror of
https://github.com/vbatts/imgsrv.git
synced 2025-01-08 05:17:08 +00:00
616 lines
17 KiB
Go
616 lines
17 KiB
Go
// mgo - MongoDB driver for Go
|
|
//
|
|
// Copyright (c) 2010-2012 - Gustavo Niemeyer <gustavo@niemeyer.net>
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright notice, this
|
|
// list of conditions and the following disclaimer.
|
|
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
package mgo
|
|
|
|
import (
|
|
"errors"
|
|
"labix.org/v2/mgo/bson"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Mongo cluster encapsulation.
|
|
//
|
|
// A cluster enables the communication with one or more servers participating
|
|
// in a mongo cluster. This works with individual servers, a replica set,
|
|
// a replica pair, one or multiple mongos routers, etc.
|
|
|
|
type mongoCluster struct {
|
|
sync.RWMutex
|
|
serverSynced sync.Cond
|
|
userSeeds []string
|
|
dynaSeeds []string
|
|
servers mongoServers
|
|
masters mongoServers
|
|
references int
|
|
syncing bool
|
|
direct bool
|
|
failFast bool
|
|
syncCount uint
|
|
cachedIndex map[string]bool
|
|
sync chan bool
|
|
dial dialer
|
|
}
|
|
|
|
func newCluster(userSeeds []string, direct, failFast bool, dial dialer) *mongoCluster {
|
|
cluster := &mongoCluster{
|
|
userSeeds: userSeeds,
|
|
references: 1,
|
|
direct: direct,
|
|
failFast: failFast,
|
|
dial: dial,
|
|
}
|
|
cluster.serverSynced.L = cluster.RWMutex.RLocker()
|
|
cluster.sync = make(chan bool, 1)
|
|
stats.cluster(+1)
|
|
go cluster.syncServersLoop()
|
|
return cluster
|
|
}
|
|
|
|
// Acquire increases the reference count for the cluster.
|
|
func (cluster *mongoCluster) Acquire() {
|
|
cluster.Lock()
|
|
cluster.references++
|
|
debugf("Cluster %p acquired (refs=%d)", cluster, cluster.references)
|
|
cluster.Unlock()
|
|
}
|
|
|
|
// Release decreases the reference count for the cluster. Once
|
|
// it reaches zero, all servers will be closed.
|
|
func (cluster *mongoCluster) Release() {
|
|
cluster.Lock()
|
|
if cluster.references == 0 {
|
|
panic("cluster.Release() with references == 0")
|
|
}
|
|
cluster.references--
|
|
debugf("Cluster %p released (refs=%d)", cluster, cluster.references)
|
|
if cluster.references == 0 {
|
|
for _, server := range cluster.servers.Slice() {
|
|
server.Close()
|
|
}
|
|
// Wake up the sync loop so it can die.
|
|
cluster.syncServers()
|
|
stats.cluster(-1)
|
|
}
|
|
cluster.Unlock()
|
|
}
|
|
|
|
func (cluster *mongoCluster) LiveServers() (servers []string) {
|
|
cluster.RLock()
|
|
for _, serv := range cluster.servers.Slice() {
|
|
servers = append(servers, serv.Addr)
|
|
}
|
|
cluster.RUnlock()
|
|
return servers
|
|
}
|
|
|
|
func (cluster *mongoCluster) removeServer(server *mongoServer) {
|
|
cluster.Lock()
|
|
cluster.masters.Remove(server)
|
|
other := cluster.servers.Remove(server)
|
|
cluster.Unlock()
|
|
if other != nil {
|
|
other.Close()
|
|
log("Removed server ", server.Addr, " from cluster.")
|
|
}
|
|
server.Close()
|
|
}
|
|
|
|
type isMasterResult struct {
|
|
IsMaster bool
|
|
Secondary bool
|
|
Primary string
|
|
Hosts []string
|
|
Passives []string
|
|
Tags bson.D
|
|
Msg string
|
|
}
|
|
|
|
func (cluster *mongoCluster) isMaster(socket *mongoSocket, result *isMasterResult) error {
|
|
// Monotonic let's it talk to a slave and still hold the socket.
|
|
session := newSession(Monotonic, cluster, 10*time.Second)
|
|
session.setSocket(socket)
|
|
err := session.Run("ismaster", result)
|
|
session.Close()
|
|
return err
|
|
}
|
|
|
|
type possibleTimeout interface {
|
|
Timeout() bool
|
|
}
|
|
|
|
var syncSocketTimeout = 5 * time.Second
|
|
|
|
func (cluster *mongoCluster) syncServer(server *mongoServer) (info *mongoServerInfo, hosts []string, err error) {
|
|
var syncTimeout time.Duration
|
|
if raceDetector {
|
|
// This variable is only ever touched by tests.
|
|
globalMutex.Lock()
|
|
syncTimeout = syncSocketTimeout
|
|
globalMutex.Unlock()
|
|
} else {
|
|
syncTimeout = syncSocketTimeout
|
|
}
|
|
|
|
addr := server.Addr
|
|
log("SYNC Processing ", addr, "...")
|
|
|
|
// Retry a few times to avoid knocking a server down for a hiccup.
|
|
var result isMasterResult
|
|
var tryerr error
|
|
for retry := 0; ; retry++ {
|
|
if retry == 3 || retry == 1 && cluster.failFast {
|
|
return nil, nil, tryerr
|
|
}
|
|
if retry > 0 {
|
|
// Don't abuse the server needlessly if there's something actually wrong.
|
|
if err, ok := tryerr.(possibleTimeout); ok && err.Timeout() {
|
|
// Give a chance for waiters to timeout as well.
|
|
cluster.serverSynced.Broadcast()
|
|
}
|
|
time.Sleep(syncShortDelay)
|
|
}
|
|
|
|
// It's not clear what would be a good timeout here. Is it
|
|
// better to wait longer or to retry?
|
|
socket, _, err := server.AcquireSocket(0, syncTimeout)
|
|
if err != nil {
|
|
tryerr = err
|
|
logf("SYNC Failed to get socket to %s: %v", addr, err)
|
|
continue
|
|
}
|
|
err = cluster.isMaster(socket, &result)
|
|
socket.Release()
|
|
if err != nil {
|
|
tryerr = err
|
|
logf("SYNC Command 'ismaster' to %s failed: %v", addr, err)
|
|
continue
|
|
}
|
|
debugf("SYNC Result of 'ismaster' from %s: %#v", addr, result)
|
|
break
|
|
}
|
|
|
|
if result.IsMaster {
|
|
debugf("SYNC %s is a master.", addr)
|
|
// Made an incorrect assumption above, so fix stats.
|
|
stats.conn(-1, false)
|
|
stats.conn(+1, true)
|
|
} else if result.Secondary {
|
|
debugf("SYNC %s is a slave.", addr)
|
|
} else if cluster.direct {
|
|
logf("SYNC %s in unknown state. Pretending it's a slave due to direct connection.", addr)
|
|
} else {
|
|
logf("SYNC %s is neither a master nor a slave.", addr)
|
|
// Made an incorrect assumption above, so fix stats.
|
|
stats.conn(-1, false)
|
|
return nil, nil, errors.New(addr + " is not a master nor slave")
|
|
}
|
|
|
|
info = &mongoServerInfo{
|
|
Master: result.IsMaster,
|
|
Mongos: result.Msg == "isdbgrid",
|
|
Tags: result.Tags,
|
|
}
|
|
|
|
hosts = make([]string, 0, 1+len(result.Hosts)+len(result.Passives))
|
|
if result.Primary != "" {
|
|
// First in the list to speed up master discovery.
|
|
hosts = append(hosts, result.Primary)
|
|
}
|
|
hosts = append(hosts, result.Hosts...)
|
|
hosts = append(hosts, result.Passives...)
|
|
|
|
debugf("SYNC %s knows about the following peers: %#v", addr, hosts)
|
|
return info, hosts, nil
|
|
}
|
|
|
|
type syncKind bool
|
|
|
|
const (
|
|
completeSync syncKind = true
|
|
partialSync syncKind = false
|
|
)
|
|
|
|
func (cluster *mongoCluster) addServer(server *mongoServer, info *mongoServerInfo, syncKind syncKind) {
|
|
cluster.Lock()
|
|
current := cluster.servers.Search(server.ResolvedAddr)
|
|
if current == nil {
|
|
if syncKind == partialSync {
|
|
cluster.Unlock()
|
|
server.Close()
|
|
log("SYNC Discarding unknown server ", server.Addr, " due to partial sync.")
|
|
return
|
|
}
|
|
cluster.servers.Add(server)
|
|
if info.Master {
|
|
cluster.masters.Add(server)
|
|
log("SYNC Adding ", server.Addr, " to cluster as a master.")
|
|
} else {
|
|
log("SYNC Adding ", server.Addr, " to cluster as a slave.")
|
|
}
|
|
} else {
|
|
if server != current {
|
|
panic("addServer attempting to add duplicated server")
|
|
}
|
|
if server.Info().Master != info.Master {
|
|
if info.Master {
|
|
log("SYNC Server ", server.Addr, " is now a master.")
|
|
cluster.masters.Add(server)
|
|
} else {
|
|
log("SYNC Server ", server.Addr, " is now a slave.")
|
|
cluster.masters.Remove(server)
|
|
}
|
|
}
|
|
}
|
|
server.SetInfo(info)
|
|
debugf("SYNC Broadcasting availability of server %s", server.Addr)
|
|
cluster.serverSynced.Broadcast()
|
|
cluster.Unlock()
|
|
}
|
|
|
|
func (cluster *mongoCluster) getKnownAddrs() []string {
|
|
cluster.RLock()
|
|
max := len(cluster.userSeeds) + len(cluster.dynaSeeds) + cluster.servers.Len()
|
|
seen := make(map[string]bool, max)
|
|
known := make([]string, 0, max)
|
|
|
|
add := func(addr string) {
|
|
if _, found := seen[addr]; !found {
|
|
seen[addr] = true
|
|
known = append(known, addr)
|
|
}
|
|
}
|
|
|
|
for _, addr := range cluster.userSeeds {
|
|
add(addr)
|
|
}
|
|
for _, addr := range cluster.dynaSeeds {
|
|
add(addr)
|
|
}
|
|
for _, serv := range cluster.servers.Slice() {
|
|
add(serv.Addr)
|
|
}
|
|
cluster.RUnlock()
|
|
|
|
return known
|
|
}
|
|
|
|
// syncServers injects a value into the cluster.sync channel to force
|
|
// an iteration of the syncServersLoop function.
|
|
func (cluster *mongoCluster) syncServers() {
|
|
select {
|
|
case cluster.sync <- true:
|
|
default:
|
|
}
|
|
}
|
|
|
|
// How long to wait for a checkup of the cluster topology if nothing
|
|
// else kicks a synchronization before that.
|
|
const syncServersDelay = 30 * time.Second
|
|
const syncShortDelay = 500 * time.Millisecond
|
|
|
|
// syncServersLoop loops while the cluster is alive to keep its idea of
|
|
// the server topology up-to-date. It must be called just once from
|
|
// newCluster. The loop iterates once syncServersDelay has passed, or
|
|
// if somebody injects a value into the cluster.sync channel to force a
|
|
// synchronization. A loop iteration will contact all servers in
|
|
// parallel, ask them about known peers and their own role within the
|
|
// cluster, and then attempt to do the same with all the peers
|
|
// retrieved.
|
|
func (cluster *mongoCluster) syncServersLoop() {
|
|
for {
|
|
debugf("SYNC Cluster %p is starting a sync loop iteration.", cluster)
|
|
|
|
cluster.Lock()
|
|
if cluster.references == 0 {
|
|
cluster.Unlock()
|
|
break
|
|
}
|
|
cluster.references++ // Keep alive while syncing.
|
|
direct := cluster.direct
|
|
cluster.Unlock()
|
|
|
|
cluster.syncServersIteration(direct)
|
|
|
|
// We just synchronized, so consume any outstanding requests.
|
|
select {
|
|
case <-cluster.sync:
|
|
default:
|
|
}
|
|
|
|
cluster.Release()
|
|
|
|
// Hold off before allowing another sync. No point in
|
|
// burning CPU looking for down servers.
|
|
if !cluster.failFast {
|
|
time.Sleep(syncShortDelay)
|
|
}
|
|
|
|
cluster.Lock()
|
|
if cluster.references == 0 {
|
|
cluster.Unlock()
|
|
break
|
|
}
|
|
cluster.syncCount++
|
|
// Poke all waiters so they have a chance to timeout or
|
|
// restart syncing if they wish to.
|
|
cluster.serverSynced.Broadcast()
|
|
// Check if we have to restart immediately either way.
|
|
restart := !direct && cluster.masters.Empty() || cluster.servers.Empty()
|
|
cluster.Unlock()
|
|
|
|
if restart {
|
|
log("SYNC No masters found. Will synchronize again.")
|
|
time.Sleep(syncShortDelay)
|
|
continue
|
|
}
|
|
|
|
debugf("SYNC Cluster %p waiting for next requested or scheduled sync.", cluster)
|
|
|
|
// Hold off until somebody explicitly requests a synchronization
|
|
// or it's time to check for a cluster topology change again.
|
|
select {
|
|
case <-cluster.sync:
|
|
case <-time.After(syncServersDelay):
|
|
}
|
|
}
|
|
debugf("SYNC Cluster %p is stopping its sync loop.", cluster)
|
|
}
|
|
|
|
func (cluster *mongoCluster) server(addr string, tcpaddr *net.TCPAddr) *mongoServer {
|
|
cluster.RLock()
|
|
server := cluster.servers.Search(tcpaddr.String())
|
|
cluster.RUnlock()
|
|
if server != nil {
|
|
return server
|
|
}
|
|
return newServer(addr, tcpaddr, cluster.sync, cluster.dial)
|
|
}
|
|
|
|
func resolveAddr(addr string) (*net.TCPAddr, error) {
|
|
tcpaddr, err := net.ResolveTCPAddr("tcp", addr)
|
|
if err != nil {
|
|
log("SYNC Failed to resolve ", addr, ": ", err.Error())
|
|
return nil, err
|
|
}
|
|
if tcpaddr.String() != addr {
|
|
debug("SYNC Address ", addr, " resolved as ", tcpaddr.String())
|
|
}
|
|
return tcpaddr, nil
|
|
}
|
|
|
|
type pendingAdd struct {
|
|
server *mongoServer
|
|
info *mongoServerInfo
|
|
}
|
|
|
|
func (cluster *mongoCluster) syncServersIteration(direct bool) {
|
|
log("SYNC Starting full topology synchronization...")
|
|
|
|
var wg sync.WaitGroup
|
|
var m sync.Mutex
|
|
notYetAdded := make(map[string]pendingAdd)
|
|
addIfFound := make(map[string]bool)
|
|
seen := make(map[string]bool)
|
|
syncKind := partialSync
|
|
|
|
var spawnSync func(addr string, byMaster bool)
|
|
spawnSync = func(addr string, byMaster bool) {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
tcpaddr, err := resolveAddr(addr)
|
|
if err != nil {
|
|
log("SYNC Failed to start sync of ", addr, ": ", err.Error())
|
|
return
|
|
}
|
|
resolvedAddr := tcpaddr.String()
|
|
|
|
m.Lock()
|
|
if byMaster {
|
|
if pending, ok := notYetAdded[resolvedAddr]; ok {
|
|
delete(notYetAdded, resolvedAddr)
|
|
m.Unlock()
|
|
cluster.addServer(pending.server, pending.info, completeSync)
|
|
return
|
|
}
|
|
addIfFound[resolvedAddr] = true
|
|
}
|
|
if seen[resolvedAddr] {
|
|
m.Unlock()
|
|
return
|
|
}
|
|
seen[resolvedAddr] = true
|
|
m.Unlock()
|
|
|
|
server := cluster.server(addr, tcpaddr)
|
|
info, hosts, err := cluster.syncServer(server)
|
|
if err != nil {
|
|
cluster.removeServer(server)
|
|
return
|
|
}
|
|
|
|
m.Lock()
|
|
add := direct || info.Master || addIfFound[resolvedAddr]
|
|
if add {
|
|
syncKind = completeSync
|
|
} else {
|
|
notYetAdded[resolvedAddr] = pendingAdd{server, info}
|
|
}
|
|
m.Unlock()
|
|
if add {
|
|
cluster.addServer(server, info, completeSync)
|
|
}
|
|
if !direct {
|
|
for _, addr := range hosts {
|
|
spawnSync(addr, info.Master)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
knownAddrs := cluster.getKnownAddrs()
|
|
for _, addr := range knownAddrs {
|
|
spawnSync(addr, false)
|
|
}
|
|
wg.Wait()
|
|
|
|
if syncKind == completeSync {
|
|
logf("SYNC Synchronization was complete (got data from primary).")
|
|
for _, pending := range notYetAdded {
|
|
cluster.removeServer(pending.server)
|
|
}
|
|
} else {
|
|
logf("SYNC Synchronization was partial (cannot talk to primary).")
|
|
for _, pending := range notYetAdded {
|
|
cluster.addServer(pending.server, pending.info, partialSync)
|
|
}
|
|
}
|
|
|
|
cluster.Lock()
|
|
ml := cluster.masters.Len()
|
|
logf("SYNC Synchronization completed: %d master(s) and %d slave(s) alive.", ml, cluster.servers.Len()-ml)
|
|
|
|
// Update dynamic seeds, but only if we have any good servers. Otherwise,
|
|
// leave them alone for better chances of a successful sync in the future.
|
|
if syncKind == completeSync {
|
|
dynaSeeds := make([]string, cluster.servers.Len())
|
|
for i, server := range cluster.servers.Slice() {
|
|
dynaSeeds[i] = server.Addr
|
|
}
|
|
cluster.dynaSeeds = dynaSeeds
|
|
debugf("SYNC New dynamic seeds: %#v\n", dynaSeeds)
|
|
}
|
|
cluster.Unlock()
|
|
}
|
|
|
|
var socketsPerServer = 4096
|
|
|
|
// AcquireSocket returns a socket to a server in the cluster. If slaveOk is
|
|
// true, it will attempt to return a socket to a slave server. If it is
|
|
// false, the socket will necessarily be to a master server.
|
|
func (cluster *mongoCluster) AcquireSocket(slaveOk bool, syncTimeout time.Duration, socketTimeout time.Duration, serverTags []bson.D) (s *mongoSocket, err error) {
|
|
var started time.Time
|
|
var syncCount uint
|
|
warnedLimit := false
|
|
for {
|
|
cluster.RLock()
|
|
for {
|
|
ml := cluster.masters.Len()
|
|
sl := cluster.servers.Len()
|
|
debugf("Cluster has %d known masters and %d known slaves.", ml, sl-ml)
|
|
if ml > 0 || slaveOk && sl > 0 {
|
|
break
|
|
}
|
|
if started.IsZero() {
|
|
// Initialize after fast path above.
|
|
started = time.Now()
|
|
syncCount = cluster.syncCount
|
|
} else if syncTimeout != 0 && started.Before(time.Now().Add(-syncTimeout)) || cluster.failFast && cluster.syncCount != syncCount {
|
|
cluster.RUnlock()
|
|
return nil, errors.New("no reachable servers")
|
|
}
|
|
log("Waiting for servers to synchronize...")
|
|
cluster.syncServers()
|
|
|
|
// Remember: this will release and reacquire the lock.
|
|
cluster.serverSynced.Wait()
|
|
}
|
|
|
|
var server *mongoServer
|
|
if slaveOk {
|
|
server = cluster.servers.BestFit(serverTags)
|
|
} else {
|
|
server = cluster.masters.BestFit(nil)
|
|
}
|
|
cluster.RUnlock()
|
|
|
|
if server == nil {
|
|
// Must have failed the requested tags. Sleep to avoid spinning.
|
|
time.Sleep(1e8)
|
|
continue
|
|
}
|
|
|
|
s, abended, err := server.AcquireSocket(socketsPerServer, socketTimeout)
|
|
if err == errSocketLimit {
|
|
if !warnedLimit {
|
|
log("WARNING: Per-server connection limit reached.")
|
|
}
|
|
time.Sleep(1e8)
|
|
continue
|
|
}
|
|
if err != nil {
|
|
cluster.removeServer(server)
|
|
cluster.syncServers()
|
|
continue
|
|
}
|
|
if abended && !slaveOk {
|
|
var result isMasterResult
|
|
err := cluster.isMaster(s, &result)
|
|
if err != nil || !result.IsMaster {
|
|
logf("Cannot confirm server %s as master (%v)", server.Addr, err)
|
|
s.Release()
|
|
cluster.syncServers()
|
|
time.Sleep(1e8)
|
|
continue
|
|
}
|
|
}
|
|
return s, nil
|
|
}
|
|
panic("unreached")
|
|
}
|
|
|
|
func (cluster *mongoCluster) CacheIndex(cacheKey string, exists bool) {
|
|
cluster.Lock()
|
|
if cluster.cachedIndex == nil {
|
|
cluster.cachedIndex = make(map[string]bool)
|
|
}
|
|
if exists {
|
|
cluster.cachedIndex[cacheKey] = true
|
|
} else {
|
|
delete(cluster.cachedIndex, cacheKey)
|
|
}
|
|
cluster.Unlock()
|
|
}
|
|
|
|
func (cluster *mongoCluster) HasCachedIndex(cacheKey string) (result bool) {
|
|
cluster.RLock()
|
|
if cluster.cachedIndex != nil {
|
|
result = cluster.cachedIndex[cacheKey]
|
|
}
|
|
cluster.RUnlock()
|
|
return
|
|
}
|
|
|
|
func (cluster *mongoCluster) ResetIndexCache() {
|
|
cluster.Lock()
|
|
cluster.cachedIndex = make(map[string]bool)
|
|
cluster.Unlock()
|
|
}
|