refactor once again
This commit is contained in:
428
synced-pool.go
428
synced-pool.go
@@ -20,34 +20,26 @@ import (
|
||||
//
|
||||
// Responsibilities:
|
||||
// - Local grain access (delegates to GrainLocalPool)
|
||||
// - Remote grain proxy management (RemoteGrainGRPC)
|
||||
// - Cluster membership (AddRemote via discovery + negotiation)
|
||||
// - Health/ping monitoring & remote removal
|
||||
// - Ring based deterministic ownership (no runtime negotiation)
|
||||
// - (Scaffolding) replication factor awareness via ring.LookupN
|
||||
// - (Legacy) ring-based ownership removed in first-touch model
|
||||
//
|
||||
// Thread-safety: public methods that mutate internal maps lock p.mu (RWMutex).
|
||||
type SyncedPool struct {
|
||||
Hostname string
|
||||
local *GrainLocalPool
|
||||
LocalHostname string
|
||||
local *GrainLocalPool
|
||||
|
||||
// New ownership tracking (first-touch / announcement model)
|
||||
// remoteOwners maps cart id -> owning host (excluding locally owned carts which live in local.grains)
|
||||
remoteOwners map[CartId]string
|
||||
|
||||
mu sync.RWMutex
|
||||
|
||||
// Remote host state (gRPC only)
|
||||
remoteHosts map[string]*RemoteHostGRPC // host -> remote host
|
||||
|
||||
// Remote grain proxies (by cart id)
|
||||
remoteIndex map[CartId]Grain
|
||||
|
||||
// Discovery handler for re-adding hosts after failures
|
||||
discardedHostHandler *DiscardedHostHandler
|
||||
|
||||
// Consistent hashing ring (immutable snapshot reference)
|
||||
ringRef *RingRef
|
||||
|
||||
// Configuration
|
||||
vnodesPerHost int
|
||||
replicationFactor int // RF (>=1). Currently only primary is active; replicas are scaffolding.
|
||||
}
|
||||
|
||||
// RemoteHostGRPC tracks a remote host's clients & health.
|
||||
@@ -68,61 +60,23 @@ var (
|
||||
Name: "cart_remote_negotiation_total",
|
||||
Help: "The total number of remote negotiations",
|
||||
})
|
||||
grainSyncCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_grain_sync_total",
|
||||
Help: "The total number of grain owner changes",
|
||||
})
|
||||
connectedRemotes = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_connected_remotes",
|
||||
Help: "The number of connected remotes",
|
||||
})
|
||||
remoteLookupCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_remote_lookup_total",
|
||||
Help: "The total number of remote lookups (legacy counter)",
|
||||
})
|
||||
|
||||
// Ring / ownership metrics
|
||||
ringEpoch = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_epoch",
|
||||
Help: "Current consistent hashing ring epoch (fingerprint-based pseudo-epoch)",
|
||||
})
|
||||
ringHosts = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_hosts",
|
||||
Help: "Number of hosts currently in the ring",
|
||||
})
|
||||
ringVnodes = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_vnodes",
|
||||
Help: "Number of virtual nodes in the ring",
|
||||
})
|
||||
ringLookupLocal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_ring_lookup_local_total",
|
||||
Help: "Ring ownership lookups resolved to the local host",
|
||||
})
|
||||
ringLookupRemote = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_ring_lookup_remote_total",
|
||||
Help: "Ring ownership lookups resolved to a remote host",
|
||||
})
|
||||
ringHostShare = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "cart_ring_host_share",
|
||||
Help: "Fractional share of ring vnodes per host",
|
||||
}, []string{"host"})
|
||||
|
||||
cartMutationsTotal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_mutations_total",
|
||||
Help: "Total number of cart state mutations applied (local + remote routed).",
|
||||
Help: "Total number of cart state mutations applied.",
|
||||
})
|
||||
|
||||
cartMutationFailuresTotal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "cart_mutation_failures_total",
|
||||
Help: "Total number of failed cart state mutations (local apply errors or remote routing failures).",
|
||||
Help: "Total number of failed cart state mutations.",
|
||||
})
|
||||
|
||||
cartMutationLatencySeconds = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "cart_mutation_latency_seconds",
|
||||
Help: "Latency of cart mutations (successful or failed) in seconds.",
|
||||
Help: "Latency of cart mutations in seconds.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"mutation"})
|
||||
|
||||
cartActiveGrains = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "cart_active_grains",
|
||||
Help: "Number of active (resident) local grains.",
|
||||
@@ -131,13 +85,11 @@ var (
|
||||
|
||||
func NewSyncedPool(local *GrainLocalPool, hostname string, discovery Discovery) (*SyncedPool, error) {
|
||||
p := &SyncedPool{
|
||||
Hostname: hostname,
|
||||
LocalHostname: hostname,
|
||||
local: local,
|
||||
remoteHosts: make(map[string]*RemoteHostGRPC),
|
||||
remoteIndex: make(map[CartId]Grain),
|
||||
remoteOwners: make(map[CartId]string),
|
||||
discardedHostHandler: NewDiscardedHostHandler(1338),
|
||||
vnodesPerHost: 64, // default smoothing factor; adjust if needed
|
||||
replicationFactor: 1, // RF scaffold; >1 not yet activating replicas
|
||||
}
|
||||
p.discardedHostHandler.SetReconnectHandler(p.AddRemote)
|
||||
// Initialize empty ring (will be rebuilt after first AddRemote or discovery event)
|
||||
@@ -180,7 +132,7 @@ func NewSyncedPool(local *GrainLocalPool, hostname string, discovery Discovery)
|
||||
|
||||
// AddRemote dials a remote host and initializes grain proxies.
|
||||
func (p *SyncedPool) AddRemote(host string) {
|
||||
if host == "" || host == p.Hostname {
|
||||
if host == "" || host == p.LocalHostname {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -253,14 +205,21 @@ func (p *SyncedPool) initializeRemote(remote *RemoteHostGRPC) {
|
||||
return
|
||||
}
|
||||
count := 0
|
||||
// Record remote ownership (first-touch model) instead of spawning remote grain proxies.
|
||||
p.mu.Lock()
|
||||
for _, idStr := range reply.CartIds {
|
||||
if idStr == "" {
|
||||
continue
|
||||
}
|
||||
p.SpawnRemoteGrain(ToCartId(idStr), remote.Host)
|
||||
cid := ToCartId(idStr)
|
||||
// Only set if not already claimed (first claim wins)
|
||||
if _, exists := p.remoteOwners[cid]; !exists {
|
||||
p.remoteOwners[cid] = remote.Host
|
||||
}
|
||||
count++
|
||||
}
|
||||
log.Printf("Remote %s reported %d grains", remote.Host, count)
|
||||
p.mu.Unlock()
|
||||
log.Printf("Remote %s reported %d remote-owned carts (ownership cached)", remote.Host, count)
|
||||
}
|
||||
|
||||
// RemoveHost removes remote host and its grains.
|
||||
@@ -270,10 +229,10 @@ func (p *SyncedPool) RemoveHost(host string) {
|
||||
if exists {
|
||||
delete(p.remoteHosts, host)
|
||||
}
|
||||
// remove grains pointing to host
|
||||
for id, g := range p.remoteIndex {
|
||||
if rg, ok := g.(*RemoteGrainGRPC); ok && rg.Host == host {
|
||||
delete(p.remoteIndex, id)
|
||||
// purge remote ownership entries for this host
|
||||
for id, h := range p.remoteOwners {
|
||||
if h == host {
|
||||
delete(p.remoteOwners, id)
|
||||
}
|
||||
}
|
||||
p.mu.Unlock()
|
||||
@@ -294,7 +253,7 @@ func (p *SyncedPool) RemoteCount() int {
|
||||
}
|
||||
|
||||
func (p *SyncedPool) IsKnown(host string) bool {
|
||||
if host == p.Hostname {
|
||||
if host == p.LocalHostname {
|
||||
return true
|
||||
}
|
||||
p.mu.RLock()
|
||||
@@ -354,7 +313,7 @@ func (p *SyncedPool) Negotiate() {
|
||||
|
||||
p.mu.RLock()
|
||||
hosts := make([]string, 0, len(p.remoteHosts)+1)
|
||||
hosts = append(hosts, p.Hostname)
|
||||
hosts = append(hosts, p.LocalHostname)
|
||||
for h := range p.remoteHosts {
|
||||
hosts = append(hosts, h)
|
||||
}
|
||||
@@ -364,8 +323,6 @@ func (p *SyncedPool) Negotiate() {
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
changed := false
|
||||
|
||||
for _, r := range remotes {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
reply, err := r.ControlClient.Negotiate(ctx, &proto.NegotiateRequest{KnownHosts: hosts})
|
||||
@@ -377,49 +334,20 @@ func (p *SyncedPool) Negotiate() {
|
||||
for _, h := range reply.Hosts {
|
||||
if !p.IsKnown(h) {
|
||||
p.AddRemote(h)
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If new hosts were discovered during negotiation, rebuild the ring once at the end.
|
||||
if changed {
|
||||
p.rebuildRing()
|
||||
}
|
||||
// Ring rebuild removed (first-touch ownership model no longer uses ring)
|
||||
}
|
||||
|
||||
// ------------------------- Grain / Ring Ownership ----------------------------
|
||||
|
||||
// RemoveRemoteGrain removes a remote grain mapping.
|
||||
func (p *SyncedPool) RemoveRemoteGrain(id CartId) {
|
||||
p.mu.Lock()
|
||||
delete(p.remoteIndex, id)
|
||||
p.mu.Unlock()
|
||||
}
|
||||
// RemoveRemoteGrain obsolete in first-touch model (no remote grain proxies retained)
|
||||
|
||||
// SpawnRemoteGrain creates/updates a remote grain proxy for a given host.
|
||||
func (p *SyncedPool) SpawnRemoteGrain(id CartId, host string) {
|
||||
if id.String() == "" {
|
||||
return
|
||||
}
|
||||
p.mu.Lock()
|
||||
// If local grain exists (legacy key), remove from local map (ownership moved).
|
||||
if g, ok := p.local.grains[LegacyToCartKey(id)]; ok && g != nil {
|
||||
delete(p.local.grains, LegacyToCartKey(id))
|
||||
}
|
||||
remoteHost, ok := p.remoteHosts[host]
|
||||
if !ok {
|
||||
p.mu.Unlock()
|
||||
log.Printf("SpawnRemoteGrain: host %s unknown (id=%s), attempting AddRemote", host, id)
|
||||
go p.AddRemote(host)
|
||||
return
|
||||
}
|
||||
rg := NewRemoteGrainGRPC(id, host, remoteHost.CartClient)
|
||||
p.remoteIndex[id] = rg
|
||||
p.mu.Unlock()
|
||||
}
|
||||
// SpawnRemoteGrain removed (remote grain proxies eliminated in first-touch model)
|
||||
|
||||
// GetHealthyRemotes returns a copy slice of healthy remote hosts.
|
||||
// GetHealthyRemotes retained (still useful for broadcasting ownership)
|
||||
func (p *SyncedPool) GetHealthyRemotes() []*RemoteHostGRPC {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
@@ -432,81 +360,22 @@ func (p *SyncedPool) GetHealthyRemotes() []*RemoteHostGRPC {
|
||||
return ret
|
||||
}
|
||||
|
||||
// rebuildRing reconstructs the consistent hashing ring from current host set
|
||||
// and updates ring-related metrics.
|
||||
func (p *SyncedPool) rebuildRing() {
|
||||
p.mu.RLock()
|
||||
hosts := make([]string, 0, len(p.remoteHosts)+1)
|
||||
hosts = append(hosts, p.Hostname)
|
||||
for h := range p.remoteHosts {
|
||||
hosts = append(hosts, h)
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
// rebuildRing removed (ring no longer used in first-touch ownership model)
|
||||
func (p *SyncedPool) rebuildRing() {}
|
||||
|
||||
epochSeed := fingerprintHosts(hosts)
|
||||
builder := NewRingBuilder().
|
||||
WithHosts(hosts).
|
||||
WithEpoch(epochSeed).
|
||||
WithVnodesPerHost(p.vnodesPerHost)
|
||||
r := builder.Build()
|
||||
if p.ringRef == nil {
|
||||
p.ringRef = NewRingRef(r)
|
||||
} else {
|
||||
p.ringRef.Set(r)
|
||||
}
|
||||
// (All ring construction & metrics removed)
|
||||
|
||||
// Metrics
|
||||
ringEpoch.Set(float64(r.Epoch))
|
||||
ringHosts.Set(float64(len(r.Hosts())))
|
||||
ringVnodes.Set(float64(len(r.Vnodes)))
|
||||
ringHostShare.Reset()
|
||||
if len(r.Vnodes) > 0 {
|
||||
perHost := make(map[string]int)
|
||||
for _, v := range r.Vnodes {
|
||||
perHost[v.Host]++
|
||||
}
|
||||
total := float64(len(r.Vnodes))
|
||||
for h, c := range perHost {
|
||||
ringHostShare.WithLabelValues(h).Set(float64(c) / total)
|
||||
}
|
||||
}
|
||||
}
|
||||
// ForceRingRefresh kept as no-op for backward compatibility.
|
||||
func (p *SyncedPool) ForceRingRefresh() {}
|
||||
|
||||
// ForceRingRefresh exposes a manual ring rebuild hook (primarily for tests).
|
||||
func (p *SyncedPool) ForceRingRefresh() {
|
||||
p.rebuildRing()
|
||||
}
|
||||
|
||||
// ownersFor returns the ordered list of primary + replica owners for a cart id
|
||||
// (length min(replicationFactor, #hosts)). Currently only the first (primary)
|
||||
// is used. This scaffolds future replication work.
|
||||
// ownersFor removed (ring-based ownership deprecated)
|
||||
func (p *SyncedPool) ownersFor(id CartId) []string {
|
||||
if p.ringRef == nil || p.replicationFactor <= 0 {
|
||||
return []string{p.Hostname}
|
||||
}
|
||||
r := p.ringRef.Get()
|
||||
if r == nil || r.Empty() {
|
||||
return []string{p.Hostname}
|
||||
}
|
||||
vnodes := r.LookupN(hashKeyString(id.String()), p.replicationFactor)
|
||||
out := make([]string, 0, len(vnodes))
|
||||
seen := make(map[string]struct{}, len(vnodes))
|
||||
for _, v := range vnodes {
|
||||
if _, ok := seen[v.Host]; ok {
|
||||
continue
|
||||
}
|
||||
seen[v.Host] = struct{}{}
|
||||
out = append(out, v.Host)
|
||||
}
|
||||
if len(out) == 0 {
|
||||
out = append(out, p.Hostname)
|
||||
}
|
||||
return out
|
||||
return []string{p.LocalHostname}
|
||||
}
|
||||
|
||||
// ownerHostFor returns the primary owner host for a given id.
|
||||
// ownerHostFor retained as wrapper to satisfy existing calls (always local)
|
||||
func (p *SyncedPool) ownerHostFor(id CartId) string {
|
||||
return p.ownersFor(id)[0]
|
||||
return p.LocalHostname
|
||||
}
|
||||
|
||||
// DebugOwnerHost exposes (for tests) the currently computed primary owner host.
|
||||
@@ -520,62 +389,139 @@ func (p *SyncedPool) removeLocalGrain(id CartId) {
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
// getGrain returns a local or remote grain. For remote ownership it performs a
|
||||
// bounded readiness wait (small retries) to reduce first-call failures while
|
||||
// the remote connection & proxy are initializing.
|
||||
func (p *SyncedPool) getGrain(id CartId) (Grain, error) {
|
||||
owner := p.ownerHostFor(id)
|
||||
if owner == p.Hostname {
|
||||
ringLookupLocal.Inc()
|
||||
grain, err := p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return grain, nil
|
||||
}
|
||||
ringLookupRemote.Inc()
|
||||
// ------------------------- First-Touch Ownership Resolution ------------------
|
||||
|
||||
// Kick off remote dial if we don't yet know the owner.
|
||||
if !p.IsKnown(owner) {
|
||||
go p.AddRemote(owner)
|
||||
// ErrNotOwner is returned when an operation is attempted on a cart that is
|
||||
// owned by a different host (according to first-touch ownership mapping).
|
||||
var ErrNotOwner = fmt.Errorf("not owner")
|
||||
|
||||
// resolveOwnerFirstTouch implements the new semantics:
|
||||
// 1. If local grain exists -> local host owns it.
|
||||
// 2. Else if remoteOwners has an entry -> return that host.
|
||||
// 3. Else: claim locally (spawn), insert into remoteOwners map locally for
|
||||
// idempotency, and asynchronously announce ownership to all remotes.
|
||||
//
|
||||
// NOTE: This does NOT (yet) reconcile conflicting announcements; first claim
|
||||
// wins. Later improvements can add tie-break via timestamp or host ordering.
|
||||
func (p *SyncedPool) resolveOwnerFirstTouch(id CartId) (string, error) {
|
||||
// Fast local existence check
|
||||
p.local.mu.RLock()
|
||||
_, existsLocal := p.local.grains[LegacyToCartKey(id)]
|
||||
p.local.mu.RUnlock()
|
||||
if existsLocal {
|
||||
return p.LocalHostname, nil
|
||||
}
|
||||
|
||||
// Fast path existing proxy
|
||||
// Remote ownership map lookup
|
||||
p.mu.RLock()
|
||||
if rg, ok := p.remoteIndex[id]; ok {
|
||||
p.mu.RUnlock()
|
||||
remoteLookupCount.Inc()
|
||||
return rg, nil
|
||||
remoteHost, foundRemote := p.remoteOwners[id]
|
||||
p.mu.RUnlock()
|
||||
if foundRemote && remoteHost != "" {
|
||||
return remoteHost, nil
|
||||
}
|
||||
|
||||
// Claim: spawn locally
|
||||
_, err := p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Record (defensive) in remoteOwners pointing to self (not strictly needed
|
||||
// for local queries, but keeps a single lookup structure).
|
||||
p.mu.Lock()
|
||||
if _, stillMissing := p.remoteOwners[id]; !stillMissing {
|
||||
// Another goroutine inserted meanwhile; keep theirs (first claim wins).
|
||||
} else {
|
||||
p.remoteOwners[id] = p.LocalHostname
|
||||
}
|
||||
p.mu.Unlock()
|
||||
|
||||
// Announce asynchronously
|
||||
go p.broadcastOwnership([]CartId{id})
|
||||
return p.LocalHostname, nil
|
||||
}
|
||||
|
||||
// broadcastOwnership sends an AnnounceOwnership RPC to all healthy remotes.
|
||||
// Best-effort: failures are logged and ignored.
|
||||
func (p *SyncedPool) broadcastOwnership(ids []CartId) {
|
||||
if len(ids) == 0 {
|
||||
return
|
||||
}
|
||||
// Prepare payload (convert to string slice)
|
||||
payload := make([]string, 0, len(ids))
|
||||
for _, id := range ids {
|
||||
if id.String() != "" {
|
||||
payload = append(payload, id.String())
|
||||
}
|
||||
}
|
||||
if len(payload) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
p.mu.RLock()
|
||||
remotes := make([]*RemoteHostGRPC, 0, len(p.remoteHosts))
|
||||
for _, r := range p.remoteHosts {
|
||||
if r.IsHealthy() {
|
||||
remotes = append(remotes, r)
|
||||
}
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
const (
|
||||
attempts = 5
|
||||
sleepPerTry = 40 * time.Millisecond
|
||||
)
|
||||
for _, r := range remotes {
|
||||
go func(rh *RemoteHostGRPC) {
|
||||
// AnnounceOwnership RPC not yet available (proto regeneration pending); no-op broadcast for now.
|
||||
// Intended announcement: host=p.LocalHostname ids=payload
|
||||
_ = rh
|
||||
}(r)
|
||||
}
|
||||
}
|
||||
|
||||
for attempt := 0; attempt < attempts; attempt++ {
|
||||
// Try to spawn (idempotent if host already known)
|
||||
if p.IsKnown(owner) {
|
||||
p.SpawnRemoteGrain(id, owner)
|
||||
// AdoptRemoteOwnership processes an incoming ownership announcement for cart ids.
|
||||
func (p *SyncedPool) AdoptRemoteOwnership(host string, ids []string) {
|
||||
if host == "" || host == p.LocalHostname {
|
||||
return
|
||||
}
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
for _, s := range ids {
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
// Check again
|
||||
p.mu.RLock()
|
||||
if rg, ok := p.remoteIndex[id]; ok {
|
||||
p.mu.RUnlock()
|
||||
remoteLookupCount.Inc()
|
||||
return rg, nil
|
||||
id := ToCartId(s)
|
||||
// Do not overwrite if already claimed by another host (first wins).
|
||||
if existing, ok := p.remoteOwners[id]; ok && existing != host {
|
||||
continue
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
// Skip if we own locally (local wins for our own process)
|
||||
p.local.mu.RLock()
|
||||
_, localHas := p.local.grains[LegacyToCartKey(id)]
|
||||
p.local.mu.RUnlock()
|
||||
if localHas {
|
||||
continue
|
||||
}
|
||||
p.remoteOwners[id] = host
|
||||
}
|
||||
}
|
||||
|
||||
// Last attempt? break to return error.
|
||||
if attempt == attempts-1 {
|
||||
break
|
||||
}
|
||||
time.Sleep(sleepPerTry)
|
||||
// getGrain returns a local grain if this host is (or becomes) the owner under
|
||||
// the first-touch model. If another host owns the cart, ErrNotOwner is returned.
|
||||
// Remote grain proxy logic and ring-based spawning have been removed.
|
||||
func (p *SyncedPool) getGrain(id CartId) (Grain, error) {
|
||||
owner, err := p.resolveOwnerFirstTouch(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if owner != p.LocalHostname {
|
||||
// Another host owns it; signal caller to proxy / forward.
|
||||
return nil, ErrNotOwner
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("remote owner %s not yet available for cart %s (after %d attempts)", owner, id.String(), attempts)
|
||||
// Owner is local (either existing or just claimed), fetch/create grain.
|
||||
grain, err := p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return grain, nil
|
||||
}
|
||||
|
||||
// Apply applies a single mutation to a grain (local or remote).
|
||||
@@ -583,9 +529,28 @@ func (p *SyncedPool) getGrain(id CartId) (Grain, error) {
|
||||
// to replica owners (best-effort) and reconcile quorum on read.
|
||||
func (p *SyncedPool) Apply(id CartId, mutation interface{}) (*CartGrain, error) {
|
||||
grain, err := p.getGrain(id)
|
||||
if err != nil {
|
||||
if err == ErrNotOwner {
|
||||
// Remote owner reported but either unreachable or failed earlier in stack.
|
||||
// Takeover strategy: remove remote mapping (first-touch override) and claim locally.
|
||||
p.mu.Lock()
|
||||
delete(p.remoteOwners, id)
|
||||
p.mu.Unlock()
|
||||
if owner, terr := p.resolveOwnerFirstTouch(id); terr != nil {
|
||||
return nil, terr
|
||||
} else if owner == p.LocalHostname {
|
||||
// Fetch (now-local) grain
|
||||
grain, err = p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// Another host reclaimed before us; treat as not owner.
|
||||
return nil, ErrNotOwner
|
||||
}
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
result, applyErr := grain.Apply(mutation, false)
|
||||
|
||||
@@ -605,7 +570,7 @@ func (p *SyncedPool) Apply(id CartId, mutation interface{}) (*CartGrain, error)
|
||||
|
||||
if applyErr == nil && result != nil {
|
||||
cartMutationsTotal.Inc()
|
||||
if p.ownerHostFor(id) == p.Hostname {
|
||||
if p.ownerHostFor(id) == p.LocalHostname {
|
||||
// Update active grains gauge only for local ownership
|
||||
cartActiveGrains.Set(float64(p.local.DebugGrainCount()))
|
||||
}
|
||||
@@ -619,7 +584,22 @@ func (p *SyncedPool) Apply(id CartId, mutation interface{}) (*CartGrain, error)
|
||||
// Future replication hook: Read-repair or quorum read can be added here.
|
||||
func (p *SyncedPool) Get(id CartId) (*CartGrain, error) {
|
||||
grain, err := p.getGrain(id)
|
||||
if err != nil {
|
||||
if err == ErrNotOwner {
|
||||
// Attempt takeover on read as well (e.g. owner dead).
|
||||
p.mu.Lock()
|
||||
delete(p.remoteOwners, id)
|
||||
p.mu.Unlock()
|
||||
if owner, terr := p.resolveOwnerFirstTouch(id); terr != nil {
|
||||
return nil, terr
|
||||
} else if owner == p.LocalHostname {
|
||||
grain, err = p.local.GetGrain(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, ErrNotOwner
|
||||
}
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return grain.GetCurrentState()
|
||||
@@ -637,7 +617,7 @@ func (p *SyncedPool) Close() {
|
||||
for _, r := range remotes {
|
||||
go func(rh *RemoteHostGRPC) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
|
||||
_, err := rh.ControlClient.Closing(ctx, &proto.ClosingNotice{Host: p.Hostname})
|
||||
_, err := rh.ControlClient.Closing(ctx, &proto.ClosingNotice{Host: p.LocalHostname})
|
||||
cancel()
|
||||
if err != nil {
|
||||
log.Printf("Close notify to %s failed: %v", rh.Host, err)
|
||||
@@ -645,3 +625,13 @@ func (p *SyncedPool) Close() {
|
||||
}(r)
|
||||
}
|
||||
}
|
||||
|
||||
// Hostname implements the GrainPool interface, returning this node's hostname.
|
||||
func (p *SyncedPool) Hostname() string {
|
||||
return p.LocalHostname
|
||||
}
|
||||
|
||||
// OwnerHost returns the primary owning host for a given cart id (ring lookup).
|
||||
func (p *SyncedPool) OwnerHost(id CartId) string {
|
||||
return p.ownerHostFor(id)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user