Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions server/jetstream_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ type raftGroup struct {
Storage StorageType `json:"store"`
Cluster string `json:"cluster,omitempty"`
Preferred string `json:"preferred,omitempty"`
ScaleUp bool `json:"scale_up,omitempty"`
// Internal
node RaftNode
}
Expand Down Expand Up @@ -715,7 +716,7 @@ func (js *jetStream) setupMetaGroup() error {
return err
}

cfg := &RaftConfig{Name: defaultMetaGroupName, Store: storeDir, Log: fs}
cfg := &RaftConfig{Name: defaultMetaGroupName, Store: storeDir, Log: fs, Recovering: true}

// If we are soliciting leafnode connections and we are sharing a system account and do not disable it with a hint,
// we want to move to observer mode so that we extend the solicited cluster or supercluster but do not form our own.
Expand Down Expand Up @@ -1627,6 +1628,7 @@ func (js *jetStream) setStreamAssignmentRecovering(sa *streamAssignment) {
sa.Restore = nil
if sa.Group != nil {
sa.Group.Preferred = _EMPTY_
sa.Group.ScaleUp = false
}
}

Expand All @@ -1638,6 +1640,7 @@ func (js *jetStream) setConsumerAssignmentRecovering(ca *consumerAssignment) {
ca.recovering = true
if ca.Group != nil {
ca.Group.Preferred = _EMPTY_
ca.Group.ScaleUp = false
}
}

Expand Down Expand Up @@ -2000,7 +2003,7 @@ func (rg *raftGroup) setPreferred() {
}

// createRaftGroup is called to spin up this raft group if needed.
func (js *jetStream) createRaftGroup(accName string, rg *raftGroup, storage StorageType, labels pprofLabels) (RaftNode, error) {
func (js *jetStream) createRaftGroup(accName string, rg *raftGroup, recovering bool, storage StorageType, labels pprofLabels) (RaftNode, error) {
// Must hold JS lock throughout, otherwise two parallel calls for the same raft group could result
// in duplicate instances for the same identifier, if the current Raft node is shutting down.
// We can release the lock temporarily while waiting for the Raft node to shut down.
Expand Down Expand Up @@ -2113,7 +2116,7 @@ retry:
store = ms
}

cfg := &RaftConfig{Name: rg.Name, Store: storeDir, Log: store, Track: true}
cfg := &RaftConfig{Name: rg.Name, Store: storeDir, Log: store, Track: true, Recovering: recovering, ScaleUp: rg.ScaleUp}

if _, err := readPeerState(storeDir); err != nil {
s.bootstrapRaftNode(cfg, rg.Peers, true)
Expand Down Expand Up @@ -3670,7 +3673,7 @@ func (js *jetStream) processClusterUpdateStream(acc *Account, osa, sa *streamAss
mset.startClusterSubs()
mset.mu.Unlock()

js.createRaftGroup(acc.GetName(), rg, storage, pprofLabels{
js.createRaftGroup(acc.GetName(), rg, recovering, storage, pprofLabels{
"type": "stream",
"account": mset.accName(),
"stream": mset.name(),
Expand Down Expand Up @@ -3781,10 +3784,11 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme
alreadyRunning := rg.node != nil
storage := sa.Config.Storage
restore := sa.Restore
recovering := sa.recovering
js.mu.RUnlock()

// Process the raft group and make sure it's running if needed.
_, err := js.createRaftGroup(acc.GetName(), rg, storage, pprofLabels{
_, err := js.createRaftGroup(acc.GetName(), rg, recovering, storage, pprofLabels{
"type": "stream",
"account": acc.Name,
"stream": sa.Config.Name,
Expand Down Expand Up @@ -3857,7 +3861,7 @@ func (js *jetStream) processClusterCreateStream(acc *Account, sa *streamAssignme
s.Warnf("JetStream cluster error updating stream %q for account %q: %v", sa.Config.Name, acc.Name, err)
if osa != nil {
// Process the raft group and make sure it's running if needed.
js.createRaftGroup(acc.GetName(), osa.Group, storage, pprofLabels{
js.createRaftGroup(acc.GetName(), osa.Group, osa.recovering, storage, pprofLabels{
"type": "stream",
"account": mset.accName(),
"stream": mset.name(),
Expand Down Expand Up @@ -4353,6 +4357,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
rg := ca.Group
alreadyRunning := rg != nil && rg.node != nil
accName, stream, consumer := ca.Client.serviceAccount(), ca.Stream, ca.Name
recovering := ca.recovering
js.mu.RUnlock()

acc, err := s.LookupAccount(accName)
Expand Down Expand Up @@ -4390,7 +4395,7 @@ func (js *jetStream) processClusterCreateConsumer(ca *consumerAssignment, state
storage = MemoryStorage
}
// No-op if R1.
js.createRaftGroup(accName, rg, storage, pprofLabels{
js.createRaftGroup(accName, rg, recovering, storage, pprofLabels{
"type": "consumer",
"account": mset.accName(),
"stream": ca.Stream,
Expand Down Expand Up @@ -6494,6 +6499,8 @@ func (s *Server) jsClusteredStreamUpdateRequest(ci *ClientInfo, acc *Account, su
return
}

// Reset notion of scaling up, if this was done in a previous update.
rg.ScaleUp = false
if isReplicaChange {
isScaleUp := newCfg.Replicas > len(rg.Peers)
// We are adding new peers here.
Expand Down Expand Up @@ -6529,6 +6536,7 @@ func (s *Server) jsClusteredStreamUpdateRequest(ci *ClientInfo, acc *Account, su
// This is scale up from being a singelton, set preferred to that singelton.
rg.Preferred = rg.Peers[0]
}
rg.ScaleUp = true
rg.Peers = peers
} else {
// We are deleting nodes here. We want to do our best to preserve the current leader.
Expand Down Expand Up @@ -7645,6 +7653,9 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec

nca := ca.copyGroup()

// Reset notion of scaling up, if this was done in a previous update.
nca.Group.ScaleUp = false

rBefore := nca.Config.replicas(sa.Config)
rAfter := cfg.replicas(sa.Config)

Expand Down Expand Up @@ -7690,6 +7701,7 @@ func (s *Server) jsClusteredConsumerRequest(ci *ClientInfo, acc *Account, subjec
}
nca.Group.Peers = newPeerSet
nca.Group.Preferred = curLeader
nca.Group.ScaleUp = true
} else if rBefore > rAfter {
newPeerSet := nca.Group.Peers
// mark leader preferred and move it to end
Expand Down
30 changes: 25 additions & 5 deletions server/jetstream_cluster_1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7011,12 +7011,32 @@ func TestJetStreamClusterConsumerInfoAfterCreate(t *testing.T) {
nc, js := jsClientConnect(t, nl)
defer nc.Close()

_, err := js.AddStream(&nats.StreamConfig{
cfg := &nats.StreamConfig{
Name: "TEST",
Subjects: []string{"foo"},
Replicas: 3,
})
}
si, err := js.AddStream(cfg)
require_NoError(t, err)

// We want to ensure the consumer can be created and be applied.
// On the non-meta-leader server we'll pause applies, so need to make
// sure the consumer is not created on that server.
if si.Cluster.Leader == nl.Name() {
ml := c.leader()
jreq, err := json.Marshal(&JSApiLeaderStepdownRequest{Placement: &Placement{Preferred: ml.Name()}})
require_NoError(t, err)
resp, err := nc.Request(fmt.Sprintf(JSApiStreamLeaderStepDownT, "TEST"), jreq, time.Second)
require_NoError(t, err)
var sdr JSApiLeaderStepDownResponse
require_NoError(t, json.Unmarshal(resp.Data, &sdr))
}

// Scale down to ensure the consumer gets created on this server.
cfg.Replicas = 1
si, err = js.UpdateStream(cfg)
require_NoError(t, err)
require_NotEqual(t, si.Cluster.Leader, nl.Name())

// We pause applies for the server we're connected to.
// This is fine for the RAFT log and allowing the consumer to be created,
Expand Down Expand Up @@ -7458,7 +7478,7 @@ func TestJetStreamClusterStreamHealthCheckOnlyReportsSkew(t *testing.T) {
// Simulate stopping and restarting a new instance.
node.Stop()
node.WaitForStop()
node, err = sjs.createRaftGroup(globalAccountName, group, FileStorage, pprofLabels{})
node, err = sjs.createRaftGroup(globalAccountName, group, false, FileStorage, pprofLabels{})
require_NoError(t, err)
require_NotEqual(t, node.State(), Closed)

Expand Down Expand Up @@ -7555,7 +7575,7 @@ func TestJetStreamClusterConsumerHealthCheckMustNotRecreate(t *testing.T) {
checkNodeIsClosed(ca)

// We create a new RAFT group, the health check should detect this skew and restart.
_, err = sjs.createRaftGroup(globalAccountName, ca.Group, MemoryStorage, pprofLabels{})
_, err = sjs.createRaftGroup(globalAccountName, ca.Group, false, MemoryStorage, pprofLabels{})
require_NoError(t, err)
sjs.mu.Lock()
// We set creating to now, since previously it would delete all data but NOT restart if created within <10s.
Expand Down Expand Up @@ -7738,7 +7758,7 @@ func TestJetStreamClusterConsumerHealthCheckOnlyReportsSkew(t *testing.T) {
// Simulate stopping and restarting a new instance.
node.Stop()
node.WaitForStop()
node, err = sjs.createRaftGroup(globalAccountName, group, FileStorage, pprofLabels{})
node, err = sjs.createRaftGroup(globalAccountName, group, false, FileStorage, pprofLabels{})
require_NoError(t, err)
require_NotEqual(t, node.State(), Closed)

Expand Down
Loading
Loading