Skip to content

Commit

Permalink
[Rule Migration] Adding CIM to ECS mapping and ESQL validation (#202331)
Browse files Browse the repository at this point in the history
## Summary

This PR adds the initial context to map CIM fields to ECS and two new
nodes validation and a node to handle esql validation issues, fixing
itself.

This is how the graph looks compared to its old one:
<img width="646" alt="image"
src="https://github.com/user-attachments/assets/253e449c-ac6f-4913-8da4-eb36f4e7b982">


Validation always runs last, and if validation returns any errors it
will run the appropriate node depending on what validation failed. Once
it is resolved it will validate again and then END when its successful.

Currently 5 error iterations is max, which is just an arbitrary number.
The default Langgraph configuration is 25 nodes executed in total for a
specific graph before it errors with a recursion limit (main and sub
graphs are not combined in that count).

A few things are included in this PR:

- Moved ESQL KB caller to util(any better place?), as it is now used in
multiple nodes.
- New Validation node, where any sort of validation takes place, usually
the last step before ending the graph (on success).
- New ESQL Error node, to resolve any ESQL validation errors and trigger
a re-validation.
- Fix a small bug in the main graph on the conditional edges, added a
map for the allowed return values.
  • Loading branch information
P1llus authored Dec 3, 2024
1 parent f0fbefa commit c1d976b
Show file tree
Hide file tree
Showing 17 changed files with 461 additions and 24 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ const matchedPrebuiltRuleConditional = (state: MigrateRuleState) => {
if (state.elastic_rule?.prebuilt_rule_id) {
return END;
}
return 'translation';
return 'translationSubGraph';
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@
*/

import { END, START, StateGraph } from '@langchain/langgraph';
import { isEmpty } from 'lodash/fp';
import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
import { getProcessQueryNode } from './nodes/process_query';
import { getRetrieveIntegrationsNode } from './nodes/retrieve_integrations';
import { getTranslateRuleNode } from './nodes/translate_rule';
import { getValidationNode } from './nodes/validation';
import { translateRuleState } from './state';
import type { TranslateRuleGraphParams } from './types';
import type { TranslateRuleGraphParams, TranslateRuleState } from './types';

// How many times we will try to self-heal when validation fails, to prevent infinite graph recursions
const MAX_VALIDATION_ITERATIONS = 3;

export function getTranslateRuleGraph({
model,
Expand All @@ -35,19 +42,37 @@ export function getTranslateRuleGraph({
model,
integrationRetriever,
});
const validationNode = getValidationNode({ logger });
const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });

const translateRuleGraph = new StateGraph(translateRuleState)
// Nodes
.addNode('processQuery', processQueryNode)
.addNode('retrieveIntegrations', retrieveIntegrationsNode)
.addNode('translateRule', translateRuleNode)
.addNode('validation', validationNode)
.addNode('fixQueryErrors', fixQueryErrorsNode)
// Edges
.addEdge(START, 'processQuery')
.addEdge('processQuery', 'retrieveIntegrations')
.addEdge('retrieveIntegrations', 'translateRule')
.addEdge('translateRule', END);
.addEdge('translateRule', 'validation')
.addEdge('fixQueryErrors', 'validation')
.addConditionalEdges('validation', validationRouter);

const graph = translateRuleGraph.compile();
graph.name = 'Translate Rule Graph';
return graph;
}

const validationRouter = (state: TranslateRuleState) => {
if (
state.validation_errors.iterations <= MAX_VALIDATION_ITERATIONS &&
state.translation_result === SiemMigrationRuleTranslationResult.FULL
) {
if (!isEmpty(state.validation_errors?.esql_errors)) {
return 'fixQueryErrors';
}
}
return END;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { Logger } from '@kbn/core/server';
import type { InferenceClient } from '@kbn/inference-plugin/server';
import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
import type { GraphNode } from '../../types';
import { RESOLVE_ESQL_ERRORS_TEMPLATE } from './prompts';

interface GetFixQueryErrorsNodeParams {
inferenceClient: InferenceClient;
connectorId: string;
logger: Logger;
}

export const getFixQueryErrorsNode = ({
inferenceClient,
connectorId,
logger,
}: GetFixQueryErrorsNodeParams): GraphNode => {
const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
return async (state) => {
const rule = state.elastic_rule;
const prompt = await RESOLVE_ESQL_ERRORS_TEMPLATE.format({
esql_errors: state.validation_errors.esql_errors,
esql_query: rule.query,
});
const response = await esqlKnowledgeBaseCaller(prompt);

const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
rule.query = esqlQuery;
return { elastic_rule: rule };
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { getFixQueryErrorsNode } from './fix_query_errors';
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ChatPromptTemplate } from '@langchain/core/prompts';

export const RESOLVE_ESQL_ERRORS_TEMPLATE =
ChatPromptTemplate.fromTemplate(`You are a helpful cybersecurity (SIEM) expert agent. Your task is to resolve the errors in the Elasticsearch Query Language (ES|QL) query provided by the user.
Below is the relevant errors related to the ES|SQL query:
<context>
<esql_errors>
{esql_errors}
</esql_errors>
<esql_query>
{esql_query}
</esql_query>
</context>
<guidelines>
- You will be provided with the currentl ES|QL query and its related errors.
- Try to resolve the errors in the ES|QL query as best as you can to make it work.
- You must respond only with the modified query inside a \`\`\`esql code block, nothing else similar to the example response below.
</guidelines>
<example_response>
A: Please find the modified ES|QL query below:
\`\`\`esql
FROM logs-endpoint.events.process-*
| WHERE process.executable LIKE \"%chown root%\"
| STATS count = COUNT(*), firstTime = MIN(@timestamp), lastTime = MAX(@timestamp) BY process.executable,
process.command_line,
host.name
| EVAL firstTime = TO_DATETIME(firstTime), lastTime = TO_DATETIME(lastTime)
\`\`\`
</example_response>
`);
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ export const getProcessQueryNode = ({
const replaceQueryResourcePrompt =
REPLACE_QUERY_RESOURCE_PROMPT.pipe(model).pipe(replaceQueryParser);
const resourceContext = getResourcesContext(resources);
query = await replaceQueryResourcePrompt.invoke({
const response = await replaceQueryResourcePrompt.invoke({
query: state.original_rule.query,
macros: resourceContext.macros,
lookup_tables: resourceContext.lists,
});
const splQuery = response.match(/```spl\n([\s\S]*?)\n```/)?.[1] ?? '';
if (splQuery) {
query = splQuery;
}
}
return { inline_query: query };
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,14 @@ Divide the query up into separate section and go through each section one at a t
<example_response>
A: Please find the modified SPL query below:
\`\`\`json
{{"match": "Linux User Account Creation"}}
\`\`\`spl
sourcetype="linux:audit" \`linux_auditd_normalized_proctitle_process\`
| rename host as dest
| where LIKE (process_exec, "%chown root%")
| stats count min(_time) as firstTime max(_time) as lastTime by process_exec proctitle normalized_proctitle_delimiter dest
| convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(firstTime)
| convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(lastTime)
| search *
\`\`\`
</example_response>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const SIEM_RULE_MIGRATION_CIM_ECS_MAP = `
datamodel,object,source_field,ecs_field,data_type
Application_State,All_Application_State,dest,service.node.name,string
Application_State,All_Application_State,process,process.title,string
Application_State,All_Application_State,user,user.name,string
Application_State,Ports,dest_port,destination.port,number
Application_State,Ports,transport,network.transport,string
Application_State,Ports,transport_dest_port,destination.port,string
Application_State,Services,service,service.name,string
Application_State,Services,service_id,service.id,string
Application_State,Services,status,service.state,string
Authentication,Authentication,action,event.action,string
Authentication,Authentication,app,process.name,string
Authentication,Authentication,dest,host.name,string
Authentication,Authentication,duration,event.duration,number
Authentication,Authentication,signature,event.code,string
Authentication,Authentication,signature_id,event.reason,string
Authentication,Authentication,src,source.address,string
Authentication,Authentication,src_nt_domain,source.domain,string
Authentication,Authentication,user,user.name,string
Certificates,All_Certificates,dest_port,destination.port,number
Certificates,All_Certificates,duration,event.duration,number
Certificates,All_Certificates,src,source.address,string
Certificates,All_Certificates,src_port,source.port,number
Certificates,All_Certificates,transport,network.protocol,string
Certificates,SSL,ssl_end_time,tls.server.not_after,time
Certificates,SSL,ssl_hash,tls.server.hash,string
Certificates,SSL,ssl_issuer_common_name,tls.server.issuer,string
Certificates,SSL,ssl_issuer_locality,x509.issuer.locality,string
Certificates,SSL,ssl_issuer_organization,x509.issuer.organization,string
Certificates,SSL,ssl_issuer_state,x509.issuer.state_or_province,string
Certificates,SSL,ssl_issuer_unit,x509.issuer.organizational_unit,string
Certificates,SSL,ssl_publickey_algorithm,x509.public_key_algorithm,string
Certificates,SSL,ssl_serial,x509.serial_number,string
Certificates,SSL,ssl_signature_algorithm,x509.signature_algorithm,string
Certificates,SSL,ssl_start_time,x509.not_before,time
Certificates,SSL,ssl_subject,x509.subject.distinguished_name,string
Certificates,SSL,ssl_subject_common_name,x509.subject.common_name,string
Certificates,SSL,ssl_subject_locality,x509.subject.locality,string
Certificates,SSL,ssl_subject_organization,x509.subject.organization,string
Certificates,SSL,ssl_subject_state,x509.subject.state_or_province,string
Certificates,SSL,ssl_subject_unit,x509.subject.organizational_unit,string
Certificates,SSL,ssl_version,tls.version,string
Change,All_Changes,action,event.action,string
Change,Account_Management,dest_nt_domain,destination.domain,string
Change,Account_Management,src_nt_domain,source.domain,string
Change,Account_Management,src_user,source.user,string
Intrusion_Detection,IDS_Attacks,action,event.action,string
Intrusion_Detection,IDS_Attacks,dest,destination.address,string
Intrusion_Detection,IDS_Attacks,dest_port,destination.port,number
Intrusion_Detection,IDS_Attacks,dvc,observer.hostname,string
Intrusion_Detection,IDS_Attacks,severity,event.severity,string
Intrusion_Detection,IDS_Attacks,src,source.ip,string
Intrusion_Detection,IDS_Attacks,user,source.user,string
JVM,OS,os,host.os.name,string
JVM,OS,os_architecture,host.architecture,string
JVM,OS,os_version,host.os.version,string
Malware,Malware_Attacks,action,event.action,string
Malware,Malware_Attacks,date,event.created,string
Malware,Malware_Attacks,dest,host.hostname,string
Malware,Malware_Attacks,file_hash,file.hash.*,string
Malware,Malware_Attacks,file_name,file.name,string
Malware,Malware_Attacks,file_path,file.path,string
Malware,Malware_Attacks,Sender,source.user.email,string
Malware,Malware_Attacks,src,source.ip,string
Malware,Malware_Attacks,user,related.user,string
Malware,Malware_Attacks,url,rule.reference,string
Network_Resolution,DNS,answer,dns.answers,string
Network_Resolution,DNS,dest,destination.address,string
Network_Resolution,DNS,dest_port,destination.port,number
Network_Resolution,DNS,duration,event.duration,number
Network_Resolution,DNS,message_type,dns.type,string
Network_Resolution,DNS,name,dns.question.name,string
Network_Resolution,DNS,query,dns.question.name,string
Network_Resolution,DNS,query_type,dns.op_code,string
Network_Resolution,DNS,record_type,dns.question.type,string
Network_Resolution,DNS,reply_code,dns.response_code,string
Network_Resolution,DNS,reply_code_id,dns.id,number
Network_Resolution,DNS,response_time,event.duration,number
Network_Resolution,DNS,src,source.address,string
Network_Resolution,DNS,src_port,source.port,number
Network_Resolution,DNS,transaction_id,dns.id,number
Network_Resolution,DNS,transport,network.transport,string
Network_Resolution,DNS,ttl,dns.answers.ttl,number
Network_Sessions,All_Sessions,action,event.action,string
Network_Sessions,All_Sessions,dest_ip,destination.ip,string
Network_Sessions,All_Sessions,dest_mac,destination.mac,string
Network_Sessions,All_Sessions,duration,event.duration,number
Network_Sessions,All_Sessions,src_dns,source.registered_domain,string
Network_Sessions,All_Sessions,src_ip,source.ip,string
Network_Sessions,All_Sessions,src_mac,source.mac,string
Network_Sessions,All_Sessions,user,user.name,string
Network_Traffic,All_Traffic,action,event.action,string
Network_Traffic,All_Traffic,app,network.protocol,string
Network_Traffic,All_Traffic,bytes,network.bytes,number
Network_Traffic,All_Traffic,dest,destination.ip,string
Network_Traffic,All_Traffic,dest_ip,destination.ip,string
Network_Traffic,All_Traffic,dest_mac,destination.mac,string
Network_Traffic,All_Traffic,dest_port,destination.port,number
Network_Traffic,All_Traffic,dest_translated_ip,destination.nat.ip,string
Network_Traffic,All_Traffic,dest_translated_port,destination.nat.port,number
Network_Traffic,All_Traffic,direction,network.direction,string
Network_Traffic,All_Traffic,duration,event.duration,number
Network_Traffic,All_Traffic,dvc,observer.name,string
Network_Traffic,All_Traffic,dvc_ip,observer.ip,string
Network_Traffic,All_Traffic,dvc_mac,observer.mac,string
Network_Traffic,All_Traffic,dvc_zone,observer.egress.zone,string
Network_Traffic,All_Traffic,packets,network.packets,number
Network_Traffic,All_Traffic,packets_in,source.packets,number
Network_Traffic,All_Traffic,packets_out,destination.packets,number
Network_Traffic,All_Traffic,protocol,network.protocol,string
Network_Traffic,All_Traffic,rule,rule.name,string
Network_Traffic,All_Traffic,src,source.address,string
Network_Traffic,All_Traffic,src_ip,source.ip,string
Network_Traffic,All_Traffic,src_mac,source.mac,string
Network_Traffic,All_Traffic,src_port,source.port,number
Network_Traffic,All_Traffic,src_translated_ip,source.nat.ip,string
Network_Traffic,All_Traffic,src_translated_port,source.nat.port,number
Network_Traffic,All_Traffic,transport,network.transport,string
Network_Traffic,All_Traffic,vlan,vlan.name,string
Vulnerabilities,Vulnerabilities,category,vulnerability.category,string
Vulnerabilities,Vulnerabilities,cve,vulnerability.id,string
Vulnerabilities,Vulnerabilities,cvss,vulnerability.score.base,number
Vulnerabilities,Vulnerabilities,dest,host.name,string
Vulnerabilities,Vulnerabilities,dvc,vulnerability.scanner.vendor,string
Vulnerabilities,Vulnerabilities,severity,vulnerability.severity,string
Vulnerabilities,Vulnerabilities,url,vulnerability.reference,string
Vulnerabilities,Vulnerabilities,user,related.user,string
Vulnerabilities,Vulnerabilities,vendor_product,vulnerability.scanner.vendor,string
Endpoint,Ports,creation_time,@timestamp,timestamp
Endpoint,Ports,dest_port,destination.port,number
Endpoint,Ports,process_id,process.pid,string
Endpoint,Ports,transport,network.transport,string
Endpoint,Ports,transport_dest_port,destination.port,string
Endpoint,Processes,action,event.action,string
Endpoint,Processes,os,os.full,string
Endpoint,Processes,parent_process_exec,process.parent.name,string
Endpoint,Processes,parent_process_id,process.ppid,number
Endpoint,Processes,parent_process_guid,process.parent.entity_id,string
Endpoint,Processes,parent_process_path,process.parent.executable,string
Endpoint,Processes,process_current_directory,process.parent.working_directory,
Endpoint,Processes,process_exec,process.name,string
Endpoint,Processes,process_hash,process.hash.*,string
Endpoint,Processes,process_guid,process.entity_id,string
Endpoint,Processes,process_id,process.pid,number
Endpoint,Processes,process_path,process.executable,string
Endpoint,Processes,user_id,related.user,string
Endpoint,Services,description,service.name,string
Endpoint,Services,process_id,service.id,string
Endpoint,Services,service_dll,dll.name,string
Endpoint,Services,service_dll_path,dll.path,string
Endpoint,Services,service_dll_hash,dll.hash.*,string
Endpoint,Services,service_dll_signature_exists,dll.code_signature.exists,boolean
Endpoint,Services,service_dll_signature_verified,dll.code_signature.valid,boolean
Endpoint,Services,service_exec,service.name,string
Endpoint,Services,service_hash,hash.*,string
Endpoint,Filesystem,file_access_time,file.accessed,timestamp
Endpoint,Filesystem,file_create_time,file.created,timestamp
Endpoint,Filesystem,file_modify_time,file.mtime,timestamp
Endpoint,Filesystem,process_id,process.pid,string
Endpoint,Registry,process_id,process.id,string
Web,Web,action,event.action,string
Web,Web,app,observer.product,string
Web,Web,bytes_in,http.request.bytes,number
Web,Web,bytes_out,http.response.bytes,number
Web,Web,dest,destination.ip,string
Web,Web,duration,event.duration,number
Web,Web,http_method,http.request.method,string
Web,Web,http_referrer,http.request.referrer,string
Web,Web,http_user_agent,user_agent.name,string
Web,Web,status,http.response.status_code,string
Web,Web,url,url.full,string
Web,Web,user,url.username,string
Web,Web,vendor_product,observer.product,string`;
Loading

0 comments on commit c1d976b

Please sign in to comment.