Skip to content

Commit

Permalink
Fix issue where gap between resource.stop and resource.start lead to …
Browse files Browse the repository at this point in the history
…missed resource changes

- changes cover create, change and remove
- resource.stop events happen
  - we unusb
  - after socket errors (that rancher sends, like revision `too old`)
  - after resource type permissions change
- there would be a gap between resource.stop (fetch latest revision, wait 5 seconds) and resource.start
- this could lead to missed resource changes and stale info on screen

Linking a couple of pertinent changes

- forceWatch partially implemented - 14862b2#diff-42632b5ed3c30e60abade8a67748b16d45e0778091713dd71a46d4bbe9211d2c
- too old originally removed https://github.com/rancher/dashboard/pull/3743/files
  - this was implemented before the backend fixed their spam

Note - resource.stop can be forced with CATTLE_WATCH_TIMEOUT_SECONDS=300 (on v1 will resource.stop every 5 mins)
Note - Too old can be forced by editing resource.stop with
      // const revision = type === '' ? undefined : 1;
      // dispatch('watch', { ...obj, revision });
  • Loading branch information
richard-cox committed Feb 21, 2023
1 parent 6ce6632 commit e73c55c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 44 deletions.
6 changes: 4 additions & 2 deletions shell/plugins/dashboard-store/actions.js
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ export default {
dispatch('watch', {
type,
revision: out.revision,
namespace: opt.watchNamespace
namespace: opt.watchNamespace,
force: opt.forceWatch === true,
});
}

Expand Down Expand Up @@ -377,7 +378,8 @@ export default {
dispatch('watch', {
type,
selector,
revision: res.revision
revision: res.revision,
force: opt.forceWatch === true,
});
}

Expand Down
64 changes: 22 additions & 42 deletions shell/plugins/steve/subscribe.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import Socket, {
EVENT_DISCONNECT_ERROR,
NO_WATCH,
NO_SCHEMA,
REVISION_TOO_OLD
} from '@shell/utils/socket';
import { normalizeType } from '@shell/plugins/dashboard-store/normalize';
import day from 'dayjs';
Expand Down Expand Up @@ -306,15 +307,18 @@ const sharedActions = {

// If socket is in error don't try to watch.... unless we `force` it
if ( !stop && !force && !getters.canWatch(params) ) {
console.error(`Cannot Watch [${ getters.storeName }]`, JSON.stringify(params)); // eslint-disable-line no-console
console.error(`Aborting Watch Request [${ getters.storeName }] (socket probably in error)`, JSON.stringify(params)); // eslint-disable-line no-console

return;
}

if ( !stop && getters.watchStarted({
type, id, selector, namespace
}) ) {
state.debugSocket && console.debug(`Already Watching [${ getters.storeName }]`, JSON.stringify(params)); // eslint-disable-line no-console
// eslint-disable-next-line no-console
state.debugSocket && console.debug(`Already Watching [${ getters.storeName }]`, {
type, id, selector, namespace
});

return;
}
Expand Down Expand Up @@ -693,17 +697,22 @@ const defaultActions = {
} else if ( err.includes('failed to find schema') ) {
commit('setInError', { type: msg.resourceType, reason: NO_SCHEMA });
} else if ( err.includes('too old') ) {
// Set an error for (all) subs of this type. This..
// 1) blocks attempts by resource.stop to resub (as type is in error)
// 2) will be cleared when resyncWatch --> watch (with force) --> resource.start completes
commit('setInError', { type: msg.resourceType, reason: REVISION_TOO_OLD });
dispatch('resyncWatch', msg);
}
},

/**
* Steve only event
*
* Steve only seems to send out `resource.stop` messages for two reasons
* - We have requested that the resource watch should be stopped and we receive this event as confirmation
* - Steve tells us that the resource is no longer watched
*
* Steve has stopped watching this resource. This happens for a couple of reasons
* - We have requested that the resource watch should be stopped (and we receive this event as confirmation)
* - Steve tells us that the resource watch has been stopped. Possible reasons
* - The rancher <--> k8s socket closed (happens every ~30 mins on mgmt socket)
* - Permissions has changed for the subscribed resource, so rancher closes socket
*/
'ws.resource.stop'({
state, getters, commit, dispatch
Expand All @@ -718,45 +727,16 @@ const defaultActions = {

state.debugSocket && console.info(`Resource Stop [${ getters.storeName }]`, type, msg); // eslint-disable-line no-console

if (!type) {

This comment has been minimized.

Copy link
@richard-cox

richard-cox Feb 23, 2023

Author Member

msg.resourceType || msg.kind msg.type

console.error(`Resource Stop [${ getters.storeName }]. Received resource.stop with an empty resourceType, aborting`, msg); // eslint-disable-line no-console

return;
}

// If we're trying to watch this event, attempt to re-watch
if ( getters['schemaFor'](type) && getters['watchStarted'](obj) ) {
commit('setWatchStopped', obj);

// In summary, we need to re-watch but with a reliable `revision` (to avoid `too old` message kicking off a full re-fetch of all
// resources). To get a reliable `revision` go out and fetch the latest for that resource type, in theory our local cache should be
// up to date with that revision.

const revisionExisting = getters.nextResourceVersion(type, obj.id);

let revisionLatest;

if (revisionExisting) {
// Attempt to fetch the latest revision at the time the resource watch was stopped, in theory our local cache should be up to
// date with this
// Ideally we shouldn't need to fetch here and supply `0`, `-1` or `null` to start watching from the latest revision, however steve
// will send the current state of each resource via a `resource.created` event.
const opt = { limit: 1 };

opt.url = getters.urlFor(type, null, opt);
revisionLatest = dispatch('request', { opt, type } )
.then(res => res.revision)
.catch((err) => {
// For some reason we can't fetch a reasonable revision, so force a re-fetch
console.warn(`Resource error retrieving resourceVersion, forcing re-fetch`, type, ':', err); // eslint-disable-line no-console
dispatch('resyncWatch', msg);
throw err;
});
} else {
// Some v1 resource types don't have revisions (either at the collection or resource level), so we avoided making an API request
// for them
revisionLatest = Promise.resolve(null); // Null to ensure we don't go through `nextResourceVersion` again
}

setTimeout(() => {
// Delay a bit so that immediate start/error/stop causes
// only a slow infinite loop instead of a tight one.
revisionLatest.then(revision => dispatch('watch', { ...obj, revision }));
}, 5000);
dispatch('watch', obj);
}
},

Expand Down
1 change: 1 addition & 0 deletions shell/utils/socket.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export const EVENT_DISCONNECT_ERROR = 'disconnect_error';

export const NO_WATCH = 'NO_WATCH';
export const NO_SCHEMA = 'NO_SCHEMA';
export const REVISION_TOO_OLD = 'TOO_OLD';

export default class Socket extends EventTarget {
url;
Expand Down

0 comments on commit e73c55c

Please sign in to comment.