Skip to content

Commit

Permalink
[Fleet] Add per-policy inactivity timeout + use runtime fields for ag…
Browse files Browse the repository at this point in the history
…ent status (#147552)

## Summary

Part of #143455 

Previously agents would be unenrolled after a given time by the fleet
server. Instead, they'll be considered `Inactive`. Agents in an
`Inactive` state are hidden from the UI by default, but their API keys
remain active. This allows these agents to check in again at any time
without requesting new API keys.`inactivity_timeout` defaults to 10
minutes or can be configured on a per policy basis.

Agents that are manually unenrolled will go into the new `Unenrolled`
status.


![image](https://user-images.githubusercontent.com/6766512/200406081-78a945bc-861a-4a5e-949c-33af59222558.png)

These changes mean that we now need to get agent policies before knowing
the agents status, we have used a runtime field to calculate the status
at search time, this allows us to easily filter and aggregate on the
status.

### Performance
For 120 agents (20 of each main status):
- filter call with filters: 90ms
- agent status summary call: 83ms

For 12k agents (2k of each main status):
- filter call with filters: 455ms
- agent status summary call: 500ms

For 120k agents (20k of each main status):

- filter call with filters: 2.2s
- agent status summary call: 2.1s

### Manual Testing

the create agent script can be used to test this at scale e.g create 10k
agents of each of the given statuses:

```bash
cd x-pack/plugins/fleet
node scripts/create_agents --count 10000  --kibana http://localhost:5601/myprefix--status offline,online,inactive,error,updating,unenrolled  --inactivityTimeout 360 --delete
```

### Checklist

Delete any items that are not applicable to this PR.

- [x] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)
- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
  • Loading branch information
hop-dev authored Dec 21, 2022
1 parent efb7cdd commit a9166da
Show file tree
Hide file tree
Showing 63 changed files with 1,433 additions and 1,095 deletions.
3 changes: 3 additions & 0 deletions x-pack/plugins/fleet/common/openapi/bundled.json
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,9 @@
"inactive": {
"type": "integer"
},
"unenrolled": {
"type": "integer"
},
"offline": {
"type": "integer"
},
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugins/fleet/common/openapi/bundled.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,8 @@ paths:
type: integer
inactive:
type: integer
unenrolled:
type: integer
offline:
type: integer
online:
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugins/fleet/common/openapi/paths/agent_status.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ get:
type: integer
inactive:
type: integer
unenrolled:
type: integer
offline:
type: integer
online:
Expand Down
100 changes: 12 additions & 88 deletions x-pack/plugins/fleet/common/services/agent_status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,55 +5,8 @@
* 2.0.
*/

import { AGENT_POLLING_THRESHOLD_MS } from '../constants';
import type { Agent, AgentStatus, FleetServerAgent } from '../types';

const offlineTimeoutIntervalCount = 10; // 30s*10 = 5m timeout

export function getAgentStatus(agent: Agent | FleetServerAgent): AgentStatus {
const { last_checkin: lastCheckIn } = agent;

if (!agent.active) {
return 'inactive';
}

if (!agent.last_checkin) {
return 'enrolling';
}

const msLastCheckIn = new Date(lastCheckIn || 0).getTime();
const msSinceLastCheckIn = new Date().getTime() - msLastCheckIn;
const intervalsSinceLastCheckIn = Math.floor(msSinceLastCheckIn / AGENT_POLLING_THRESHOLD_MS);

if (intervalsSinceLastCheckIn >= offlineTimeoutIntervalCount) {
return 'offline';
}

if (agent.unenrollment_started_at && !agent.unenrolled_at) {
return 'unenrolling';
}

if (agent.last_checkin_status?.toLowerCase() === 'error') {
return 'error';
}
if (agent.last_checkin_status?.toLowerCase() === 'degraded') {
return 'degraded';
}

const policyRevision =
'policy_revision' in agent
? agent.policy_revision
: 'policy_revision_idx' in agent
? agent.policy_revision_idx
: undefined;

if (!policyRevision || (agent.upgrade_started_at && !agent.upgraded_at)) {
return 'updating';
}

return 'online';
}

export function getPreviousAgentStatusForOfflineAgents(
agent: Agent | FleetServerAgent
): AgentStatus | undefined {
Expand All @@ -80,55 +33,26 @@ export function getPreviousAgentStatusForOfflineAgents(
}
}

export function buildKueryForEnrollingAgents(path: string = ''): string {
return `not (${path}last_checkin:*)`;
}

export function buildKueryForUnenrollingAgents(path: string = ''): string {
return `${path}unenrollment_started_at:*`;
}

export function buildKueryForOnlineAgents(path: string = ''): string {
return `${path}last_checkin:* ${addExclusiveKueryFilter(
[buildKueryForOfflineAgents, buildKueryForUpdatingAgents, buildKueryForErrorAgents],
path
)}`;
}

export function buildKueryForErrorAgents(path: string = ''): string {
return `(${path}last_checkin_status:error or ${path}last_checkin_status:degraded or ${path}last_checkin_status:DEGRADED or ${path}last_checkin_status:ERROR) ${addExclusiveKueryFilter(
[buildKueryForOfflineAgents, buildKueryForUnenrollingAgents],
path
)}`;
export function buildKueryForUnenrolledAgents(): string {
return 'status:unenrolled';
}

export function buildKueryForOfflineAgents(path: string = ''): string {
return `${path}last_checkin < now-${
(offlineTimeoutIntervalCount * AGENT_POLLING_THRESHOLD_MS) / 1000
}s`;
export function buildKueryForOnlineAgents(): string {
return 'status:online';
}

export function buildKueryForUpgradingAgents(path: string = ''): string {
return `(${path}upgrade_started_at:*) and not (${path}upgraded_at:*)`;
export function buildKueryForErrorAgents(): string {
return '(status:error or status:degraded)';
}

export function buildKueryForUpdatingAgents(path: string = ''): string {
return `((${buildKueryForUpgradingAgents(path)}) or (${buildKueryForEnrollingAgents(
path
)}) or (${buildKueryForUnenrollingAgents(
path
)}) or (not ${path}policy_revision_idx:*)) ${addExclusiveKueryFilter(
[buildKueryForOfflineAgents, buildKueryForErrorAgents],
path
)}`;
export function buildKueryForOfflineAgents(): string {
return 'status:offline';
}

export function buildKueryForInactiveAgents(path: string = '') {
return `${path}active:false`;
export function buildKueryForUpdatingAgents(): string {
return '(status:updating or status:unenrolling or status:enrolling)';
}

function addExclusiveKueryFilter(kueryBuilders: Array<(path?: string) => string>, path?: string) {
return ` AND not (${kueryBuilders
.map((kueryBuilder) => `(${kueryBuilder(path)})`)
.join(' or ')})`;
export function buildKueryForInactiveAgents() {
return 'status:inactive';
}
9 changes: 8 additions & 1 deletion x-pack/plugins/fleet/common/types/models/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,17 @@ export type AgentStatus =
| 'inactive'
| 'enrolling'
| 'unenrolling'
| 'unenrolled'
| 'updating'
| 'degraded';

export type SimplifiedAgentStatus = 'healthy' | 'unhealthy' | 'updating' | 'offline' | 'inactive';
export type SimplifiedAgentStatus =
| 'healthy'
| 'unhealthy'
| 'updating'
| 'offline'
| 'inactive'
| 'unenrolled';

export type AgentActionType =
| 'UNENROLL'
Expand Down
2 changes: 2 additions & 0 deletions x-pack/plugins/fleet/common/types/rest_spec/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ export interface GetAgentStatusResponse {
offline: number;
other: number;
updating: number;
inactive: number;
unenrolled: number;
};
}

Expand Down
14 changes: 12 additions & 2 deletions x-pack/plugins/fleet/cypress/e2e/agent_list.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,17 @@ describe('View agents list', () => {
});

describe('Agent status filter', () => {
const clearFilters = () => {
cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();
cy.get('button').contains('Healthy').click();
cy.get('button').contains('Unhealthy').click();
cy.get('button').contains('Updating').click();
cy.get('button').contains('Offline').click();
cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();
};
it('should filter on healthy (16 result)', () => {
cy.visit('/app/fleet/agents');

clearFilters();
cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();

cy.get('button').contains('Healthy').click();
Expand All @@ -179,7 +187,7 @@ describe('View agents list', () => {

it('should filter on unhealthy (1 result)', () => {
cy.visit('/app/fleet/agents');

clearFilters();
cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();

cy.get('button').contains('Unhealthy').click();
Expand All @@ -190,6 +198,7 @@ describe('View agents list', () => {

it('should filter on inactive (0 result)', () => {
cy.visit('/app/fleet/agents');
clearFilters();

cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();

Expand All @@ -200,6 +209,7 @@ describe('View agents list', () => {

it('should filter on healthy and unhealthy', () => {
cy.visit('/app/fleet/agents');
clearFilters();

cy.getBySel(FLEET_AGENT_LIST_PAGE.STATUS_FILTER).click();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ const statusFilters = [
defaultMessage: 'Inactive',
}),
},
{
status: 'unenrolled',
label: i18n.translate('xpack.fleet.agentList.statusUnenrolledFilterText', {
defaultMessage: 'Unenrolled',
}),
},
];

const ClearAllTagsFilterItem = styled(EuiFilterSelectItem)`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ export const AgentStatusBadges: React.FC<{
agentStatus: { [k in SimplifiedAgentStatus]: number };
}> = memo(({ agentStatus, showInactive }) => {
const agentStatuses = useMemo(() => {
return AGENT_STATUSES.filter((status) => (showInactive ? true : status !== 'inactive'));
return AGENT_STATUSES.filter((status) =>
showInactive ? true : status !== 'inactive' && status !== 'unenrolled'
);
}, [showInactive]);

return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ export const AgentListPage: React.FunctionComponent<{}> = () => {
const [selectedAgentPolicies, setSelectedAgentPolicies] = useState<string[]>([]);

// Status for filtering
const [selectedStatus, setSelectedStatus] = useState<string[]>([]);
const [selectedStatus, setSelectedStatus] = useState<string[]>([
'healthy',
'unhealthy',
'updating',
'offline',
]);

const [selectedTags, setSelectedTags] = useState<string[]>([]);

Expand Down Expand Up @@ -183,6 +188,8 @@ export const AgentListPage: React.FunctionComponent<{}> = () => {
return AgentStatusKueryHelper.buildKueryForUpdatingAgents();
case 'inactive':
return AgentStatusKueryHelper.buildKueryForInactiveAgents();
case 'unenrolled':
return AgentStatusKueryHelper.buildKueryForUnenrolledAgents();
}

return undefined;
Expand All @@ -201,7 +208,7 @@ export const AgentListPage: React.FunctionComponent<{}> = () => {
}, [search, selectedAgentPolicies, selectedTags, selectedStatus]);

const showInactive = useMemo(() => {
return selectedStatus.includes('inactive');
return selectedStatus.some((status) => status === 'inactive' || status === 'unenrolled');
}, [selectedStatus]);

const [agents, setAgents] = useState<Agent[]>([]);
Expand Down Expand Up @@ -309,7 +316,8 @@ export const AgentListPage: React.FunctionComponent<{}> = () => {
unhealthy: agentsStatusResponse.data.results.error,
offline: agentsStatusResponse.data.results.offline,
updating: agentsStatusResponse.data.results.updating,
inactive: agentsResponse.data.totalInactive,
inactive: agentsStatusResponse.data.results.inactive,
unenrolled: agentsStatusResponse.data.results.unenrolled,
});

const newAllTags = agentTagsResponse.data.items;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ const Status = {
<FormattedMessage id="xpack.fleet.agentHealth.inactiveStatusText" defaultMessage="Inactive" />
</EuiBadge>
),
Unenrolled: (
<EuiBadge color="hollow">
<FormattedMessage
id="xpack.fleet.agentHealth.unenrolledStatusText"
defaultMessage="Unenrolled"
/>
</EuiBadge>
),
Unhealthy: (
<EuiBadge color="warning">
<FormattedMessage
Expand Down Expand Up @@ -64,6 +72,8 @@ function getStatusComponent(status: Agent['status']): React.ReactElement {
case 'enrolling':
case 'updating':
return Status.Updating;
case 'unenrolled':
return Status.Unenrolled;
default:
return Status.Healthy;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const colorToHexMap = {
warning: visColors[5],
danger: visColors[9],
inactive: euiLightVars.euiColorDarkShade,
lightest: euiLightVars.euiColorDisabled,
};

export const AGENT_STATUSES: SimplifiedAgentStatus[] = [
Expand All @@ -29,6 +30,7 @@ export const AGENT_STATUSES: SimplifiedAgentStatus[] = [
'updating',
'offline',
'inactive',
'unenrolled',
];

export function getColorForAgentStatus(agentStatus: SimplifiedAgentStatus): string {
Expand All @@ -43,6 +45,8 @@ export function getColorForAgentStatus(agentStatus: SimplifiedAgentStatus): stri
return colorToHexMap.warning;
case 'updating':
return colorToHexMap.primary;
case 'unenrolled':
return colorToHexMap.lightest;
default:
throw new Error(`Unsupported Agent status ${agentStatus}`);
}
Expand All @@ -62,6 +66,10 @@ export function getLabelForAgentStatus(agentStatus: SimplifiedAgentStatus): stri
return i18n.translate('xpack.fleet.agentStatus.inactiveLabel', {
defaultMessage: 'Inactive',
});
case 'unenrolled':
return i18n.translate('xpack.fleet.agentStatus.unenrolledLabel', {
defaultMessage: 'Unenrolled',
});
case 'unhealthy':
return i18n.translate('xpack.fleet.agentStatus.unhealthyLabel', {
defaultMessage: 'Unhealthy',
Expand Down
Loading

0 comments on commit a9166da

Please sign in to comment.