more refactoring
This commit is contained in:
251
synced-pool.go
251
synced-pool.go
@@ -15,14 +15,15 @@ import (
|
||||
)
|
||||
|
||||
// SyncedPool coordinates cart grain ownership across nodes using gRPC control plane
|
||||
// and cart actor services. Legacy frame / TCP code has been removed.
|
||||
// and cart actor services.
|
||||
//
|
||||
// Responsibilities:
|
||||
// - Local grain access (delegates to GrainLocalPool)
|
||||
// - Remote grain proxy management (RemoteGrainGRPC)
|
||||
// - Cluster membership (AddRemote via discovery + negotiation)
|
||||
// - Ownership acquisition (quorum via ConfirmOwner RPC)
|
||||
// - Health/ping monitoring & remote removal
|
||||
// - Ring based deterministic ownership (no runtime negotiation)
|
||||
// - (Scaffolding) replication factor awareness via ring.LookupN
|
||||
//
|
||||
// Thread-safety: public methods that mutate internal maps lock p.mu (RWMutex).
|
||||
type SyncedPool struct {
|
||||
@@ -40,7 +41,12 @@ type SyncedPool struct {
|
||||
// Discovery handler for re-adding hosts after failures
|
||||
discardedHostHandler *DiscardedHostHandler
|
||||
|
||||
// Metrics / instrumentation dependencies already declared globally
|
||||
// Consistent hashing ring (immutable snapshot reference)
|
||||
ringRef *RingRef
|
||||
|
||||
// Configuration
|
||||
vnodesPerHost int
|
||||
replicationFactor int // RF (>=1). Currently only primary is active; replicas are scaffolding.
|
||||
}
|
||||
|
||||
// RemoteHostGRPC tracks a remote host's clients & health.
|
||||
@@ -71,8 +77,34 @@ var (
|
||||
})
|
||||
remoteLookupCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_remote_lookup_total",
|
||||
Help: "The total number of remote lookups",
|
||||
Help: "The total number of remote lookups (legacy counter)",
|
||||
})
|
||||
|
||||
// Ring / ownership metrics
|
||||
ringEpoch = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_epoch",
|
||||
Help: "Current consistent hashing ring epoch (fingerprint-based pseudo-epoch)",
|
||||
})
|
||||
ringHosts = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_hosts",
|
||||
Help: "Number of hosts currently in the ring",
|
||||
})
|
||||
ringVnodes = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_vnodes",
|
||||
Help: "Number of virtual nodes in the ring",
|
||||
})
|
||||
ringLookupLocal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_ring_lookup_local_total",
|
||||
Help: "Ring ownership lookups resolved to the local host",
|
||||
})
|
||||
ringLookupRemote = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_ring_lookup_remote_total",
|
||||
Help: "Ring ownership lookups resolved to a remote host",
|
||||
})
|
||||
ringHostShare = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_host_share",
|
||||
Help: "Fractional share of ring vnodes per host",
|
||||
}, []string{"host"})
|
||||
)
|
||||
|
||||
func NewSyncedPool(local *GrainLocalPool, hostname string, discovery Discovery) (*SyncedPool, error) {
|
||||
@@ -82,8 +114,12 @@ func NewSyncedPool(local *GrainLocalPool, hostname string, discovery Discovery)
|
||||
remoteHosts: make(map[string]*RemoteHostGRPC),
|
||||
remoteIndex: make(map[CartId]Grain),
|
||||
discardedHostHandler: NewDiscardedHostHandler(1338),
|
||||
vnodesPerHost: 64, // default smoothing factor; adjust if needed
|
||||
replicationFactor: 1, // RF scaffold; >1 not yet activating replicas
|
||||
}
|
||||
p.discardedHostHandler.SetReconnectHandler(p.AddRemote)
|
||||
// Initialize empty ring (will be rebuilt after first AddRemote or discovery event)
|
||||
p.rebuildRing()
|
||||
|
||||
if discovery != nil {
|
||||
go func() {
|
||||
@@ -175,6 +211,8 @@ func (p *SyncedPool) AddRemote(host string) {
|
||||
p.remoteHosts[host] = remote
|
||||
p.mu.Unlock()
|
||||
connectedRemotes.Set(float64(p.RemoteCount()))
|
||||
// Rebuild consistent hashing ring including this new host
|
||||
p.rebuildRing()
|
||||
|
||||
log.Printf("Connected to remote host %s", host)
|
||||
|
||||
@@ -222,6 +260,8 @@ func (p *SyncedPool) RemoveHost(host string) {
|
||||
remote.Conn.Close()
|
||||
}
|
||||
connectedRemotes.Set(float64(p.RemoteCount()))
|
||||
// Rebuild ring after host removal
|
||||
p.rebuildRing()
|
||||
}
|
||||
|
||||
// RemoteCount returns number of tracked remote hosts.
|
||||
@@ -302,6 +342,8 @@ func (p *SyncedPool) Negotiate() {
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
changed := false
|
||||
|
||||
for _, r := range remotes {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
reply, err := r.ControlClient.Negotiate(ctx, &proto.NegotiateRequest{KnownHosts: hosts})
|
||||
@@ -313,12 +355,18 @@ func (p *SyncedPool) Negotiate() {
|
||||
for _, h := range reply.Hosts {
|
||||
if !p.IsKnown(h) {
|
||||
p.AddRemote(h)
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If new hosts were discovered during negotiation, rebuild the ring once at the end.
|
||||
if changed {
|
||||
p.rebuildRing()
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------- Grain Management ----------------------------------
|
||||
// ------------------------- Grain / Ring Ownership ----------------------------
|
||||
|
||||
// RemoveRemoteGrain removes a remote grain mapping.
|
||||
func (p *SyncedPool) RemoveRemoteGrain(id CartId) {
|
||||
@@ -333,9 +381,9 @@ func (p *SyncedPool) SpawnRemoteGrain(id CartId, host string) {
|
||||
return
|
||||
}
|
||||
p.mu.Lock()
|
||||
// If local grain exists, remove it (ownership changed)
|
||||
if g, ok := p.local.grains[id]; ok && g != nil {
|
||||
delete(p.local.grains, id)
|
||||
// If local grain exists (legacy key), remove from local map (ownership moved).
|
||||
if g, ok := p.local.grains[LegacyToCartKey(id)]; ok && g != nil {
|
||||
delete(p.local.grains, LegacyToCartKey(id))
|
||||
}
|
||||
remoteHost, ok := p.remoteHosts[host]
|
||||
if !ok {
|
||||
@@ -362,78 +410,134 @@ func (p *SyncedPool) GetHealthyRemotes() []*RemoteHostGRPC {
|
||||
return ret
|
||||
}
|
||||
|
||||
// RequestOwnership attempts to become owner of a cart, requiring quorum.
|
||||
// On success local grain is (or will be) created; peers spawn remote proxies.
|
||||
func (p *SyncedPool) RequestOwnership(id CartId) error {
|
||||
ok := 0
|
||||
all := 0
|
||||
remotes := p.GetHealthyRemotes()
|
||||
log.Printf("RequestOwnership start id=%s host=%s healthyRemotes=%d", id, p.Hostname, len(remotes))
|
||||
for _, r := range remotes {
|
||||
log.Printf("RequestOwnership sending ConfirmOwner to host=%s id=%s", r.Host, id)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 800*time.Millisecond)
|
||||
reply, err := r.ControlClient.ConfirmOwner(ctx, &proto.OwnerChangeRequest{
|
||||
CartId: id.String(),
|
||||
NewHost: p.Hostname,
|
||||
})
|
||||
cancel()
|
||||
all++
|
||||
if err != nil || reply == nil || !reply.Accepted {
|
||||
log.Printf("RequestOwnership negative/failed response from host=%s id=%s err=%v reply=%v", r.Host, id, err, reply)
|
||||
continue
|
||||
}
|
||||
ok++
|
||||
log.Printf("RequestOwnership accept from host=%s id=%s (ok=%d all=%d)", r.Host, id, ok, all)
|
||||
// rebuildRing reconstructs the consistent hashing ring from current host set
|
||||
// and updates ring-related metrics.
|
||||
func (p *SyncedPool) rebuildRing() {
|
||||
p.mu.RLock()
|
||||
hosts := make([]string, 0, len(p.remoteHosts)+1)
|
||||
hosts = append(hosts, p.Hostname)
|
||||
for h := range p.remoteHosts {
|
||||
hosts = append(hosts, h)
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
epochSeed := fingerprintHosts(hosts)
|
||||
builder := NewRingBuilder().
|
||||
WithHosts(hosts).
|
||||
WithEpoch(epochSeed).
|
||||
WithVnodesPerHost(p.vnodesPerHost)
|
||||
r := builder.Build()
|
||||
if p.ringRef == nil {
|
||||
p.ringRef = NewRingRef(r)
|
||||
} else {
|
||||
p.ringRef.Set(r)
|
||||
}
|
||||
|
||||
// Quorum rule (majority semantics):
|
||||
// - Let N = all remotes + 1 (self)
|
||||
// - We require ok + 1 (implicit self vote) >= floor(N/2)+1
|
||||
// => ok >= floor(N/2)
|
||||
// - Examples:
|
||||
// N=2 (all=1): threshold=1 (need 1 remote)
|
||||
// N=3 (all=2): threshold=1 (need 1 remote; previously required 2)
|
||||
// N=4 (all=3): threshold=2
|
||||
// N=5 (all=4): threshold=2
|
||||
// - This change allows faster ownership under partial remote availability in small clusters.
|
||||
log.Printf("RequestOwnership quorum evaluation id=%s host=%s ok=%d all=%d", id, p.Hostname, ok, all)
|
||||
threshold := (all + 1) / 2 // floor(N/2)
|
||||
if ok < threshold {
|
||||
p.removeLocalGrain(id)
|
||||
log.Printf("RequestOwnership failed quorum id=%s host=%s ok=%d all=%d threshold=%d", id, p.Hostname, ok, all, threshold)
|
||||
return fmt.Errorf("quorum not reached (ok=%d all=%d threshold=%d)", ok, all, threshold)
|
||||
// Metrics
|
||||
ringEpoch.Set(float64(r.Epoch))
|
||||
ringHosts.Set(float64(len(r.Hosts())))
|
||||
ringVnodes.Set(float64(len(r.Vnodes)))
|
||||
ringHostShare.Reset()
|
||||
if len(r.Vnodes) > 0 {
|
||||
perHost := make(map[string]int)
|
||||
for _, v := range r.Vnodes {
|
||||
perHost[v.Host]++
|
||||
}
|
||||
total := float64(len(r.Vnodes))
|
||||
for h, c := range perHost {
|
||||
ringHostShare.WithLabelValues(h).Set(float64(c) / total)
|
||||
}
|
||||
}
|
||||
grainSyncCount.Inc()
|
||||
log.Printf("RequestOwnership success id=%s host=%s ok=%d all=%d threshold=%d", id, p.Hostname, ok, all, threshold)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ForceRingRefresh exposes a manual ring rebuild hook (primarily for tests).
|
||||
func (p *SyncedPool) ForceRingRefresh() {
|
||||
p.rebuildRing()
|
||||
}
|
||||
|
||||
// ownersFor returns the ordered list of primary + replica owners for a cart id
|
||||
// (length min(replicationFactor, #hosts)). Currently only the first (primary)
|
||||
// is used. This scaffolds future replication work.
|
||||
func (p *SyncedPool) ownersFor(id CartId) []string {
|
||||
if p.ringRef == nil || p.replicationFactor <= 0 {
|
||||
return []string{p.Hostname}
|
||||
}
|
||||
r := p.ringRef.Get()
|
||||
if r == nil || r.Empty() {
|
||||
return []string{p.Hostname}
|
||||
}
|
||||
vnodes := r.LookupN(hashKeyString(id.String()), p.replicationFactor)
|
||||
out := make([]string, 0, len(vnodes))
|
||||
seen := make(map[string]struct{}, len(vnodes))
|
||||
for _, v := range vnodes {
|
||||
if _, ok := seen[v.Host]; ok {
|
||||
continue
|
||||
}
|
||||
seen[v.Host] = struct{}{}
|
||||
out = append(out, v.Host)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
out = append(out, p.Hostname)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ownerHostFor returns the primary owner host for a given id.
|
||||
func (p *SyncedPool) ownerHostFor(id CartId) string {
|
||||
return p.ownersFor(id)[0]
|
||||
}
|
||||
|
||||
// DebugOwnerHost exposes (for tests) the currently computed primary owner host.
|
||||
func (p *SyncedPool) DebugOwnerHost(id CartId) string {
|
||||
return p.ownerHostFor(id)
|
||||
}
|
||||
|
||||
func (p *SyncedPool) removeLocalGrain(id CartId) {
|
||||
p.mu.Lock()
|
||||
delete(p.local.grains, id)
|
||||
delete(p.local.grains, LegacyToCartKey(id))
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
// getGrain returns a local or remote grain. If absent, it synchronously attempts
|
||||
// to acquire ownership before spawning a local grain to eliminate the race where
|
||||
// the first mutation applies before peers have installed remote proxies.
|
||||
// getGrain returns a local or remote grain. For remote ownership it performs a
|
||||
// bounded readiness wait (small retries) to reduce first-call failures while
|
||||
// the remote connection & proxy are initializing.
|
||||
func (p *SyncedPool) getGrain(id CartId) (Grain, error) {
|
||||
owner := p.ownerHostFor(id)
|
||||
if owner == p.Hostname {
|
||||
ringLookupLocal.Inc()
|
||||
grain, err := p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return grain, nil
|
||||
}
|
||||
ringLookupRemote.Inc()
|
||||
|
||||
// Kick off remote dial if we don't yet know the owner.
|
||||
if !p.IsKnown(owner) {
|
||||
go p.AddRemote(owner)
|
||||
}
|
||||
|
||||
// Fast path existing proxy
|
||||
p.mu.RLock()
|
||||
localGrain, isLocal := p.local.grains[id]
|
||||
remoteGrain, isRemote := p.remoteIndex[id]
|
||||
if rg, ok := p.remoteIndex[id]; ok {
|
||||
p.mu.RUnlock()
|
||||
remoteLookupCount.Inc()
|
||||
return rg, nil
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
if isLocal && localGrain != nil {
|
||||
return localGrain, nil
|
||||
}
|
||||
if isRemote {
|
||||
remoteLookupCount.Inc()
|
||||
return remoteGrain, nil
|
||||
}
|
||||
const (
|
||||
attempts = 5
|
||||
sleepPerTry = 40 * time.Millisecond
|
||||
)
|
||||
|
||||
// Synchronously attempt to claim ownership. If this fails (quorum not reached)
|
||||
// we re-check for a newly appeared remote proxy (another node became owner).
|
||||
if err := p.RequestOwnership(id); err != nil {
|
||||
for attempt := 0; attempt < attempts; attempt++ {
|
||||
// Try to spawn (idempotent if host already known)
|
||||
if p.IsKnown(owner) {
|
||||
p.SpawnRemoteGrain(id, owner)
|
||||
}
|
||||
// Check again
|
||||
p.mu.RLock()
|
||||
if rg, ok := p.remoteIndex[id]; ok {
|
||||
p.mu.RUnlock()
|
||||
@@ -441,18 +545,20 @@ func (p *SyncedPool) getGrain(id CartId) (Grain, error) {
|
||||
return rg, nil
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
return nil, err
|
||||
|
||||
// Last attempt? break to return error.
|
||||
if attempt == attempts-1 {
|
||||
break
|
||||
}
|
||||
time.Sleep(sleepPerTry)
|
||||
}
|
||||
|
||||
// Ownership acquired; now lazily spawn the local grain.
|
||||
grain, err := p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return grain, nil
|
||||
return nil, fmt.Errorf("remote owner %s not yet available for cart %s (after %d attempts)", owner, id.String(), attempts)
|
||||
}
|
||||
|
||||
// Apply applies a single mutation to a grain (local or remote).
|
||||
// Replication (RF>1) scaffolding: future enhancement will fan-out mutations
|
||||
// to replica owners (best-effort) and reconcile quorum on read.
|
||||
func (p *SyncedPool) Apply(id CartId, mutation interface{}) (*CartGrain, error) {
|
||||
grain, err := p.getGrain(id)
|
||||
if err != nil {
|
||||
@@ -462,6 +568,7 @@ func (p *SyncedPool) Apply(id CartId, mutation interface{}) (*CartGrain, error)
|
||||
}
|
||||
|
||||
// Get returns current state of a grain (local or remote).
|
||||
// Future replication hook: Read-repair or quorum read can be added here.
|
||||
func (p *SyncedPool) Get(id CartId) (*CartGrain, error) {
|
||||
grain, err := p.getGrain(id)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user