Skip to content

Commit

Permalink
[dataflowengineoss] Turn Semantics into a node-directed trait (#4920)
Browse files Browse the repository at this point in the history
* Refactor `Semantics` -> `FullNameSemantics`

* Refactor `FlowSemantic`/`FlowNode`/`FlowPath` out of the FullNameSemantics parser

* Rename `Parser` -> `FullNameSemanticsParser`

* Introduce `Semantics` trait with `initialize` and `forMethod`
  • Loading branch information
xavierpinho authored Sep 17, 2024
1 parent 19011fb commit 3a530c0
Show file tree
Hide file tree
Showing 32 changed files with 301 additions and 271 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.joern.dataflowengineoss

import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, PassThroughMapping, Semantics}
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, PassThroughMapping, FullNameSemantics}
import io.shiftleft.codepropertygraph.generated.Operators

import scala.annotation.unused
Expand All @@ -10,9 +10,9 @@ object DefaultSemantics {
/** @return
* a default set of common external procedure calls for all languages.
*/
def apply(): Semantics = {
def apply(): FullNameSemantics = {
val list = operatorFlows ++ cFlows ++ javaFlows
Semantics.fromList(list)
FullNameSemantics.fromList(list)
}

private def F = (x: String, y: List[(Int, Int)]) => FlowSemantic.from(x, y)
Expand Down Expand Up @@ -157,6 +157,6 @@ object DefaultSemantics {
* procedure semantics for operators and common external Java calls only.
*/
@unused
def javaSemantics(): Semantics = Semantics.fromList(operatorFlows ++ javaFlows)
def javaSemantics(): FullNameSemantics = FullNameSemantics.fromList(operatorFlows ++ javaFlows)

}
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ class ExpressionMethods[NodeType <: Expression](val node: NodeType) extends AnyV
/** Retrieve flow semantic for the call this argument is a part of.
*/
def semanticsForCallByArg(implicit semantics: Semantics): Iterator[FlowSemantic] = {
argToMethods(node).flatMap { method =>
semantics.forMethod(method.fullName)
}
argToMethods(node).flatMap(semantics.forMethod)
}

private def argToMethods(arg: Expression): Iterator[Method] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package io.joern.dataflowengineoss.layers.dataflows

import io.joern.dataflowengineoss.DefaultSemantics
import io.joern.dataflowengineoss.passes.reachingdef.ReachingDefPass
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, FullNameSemantics, Semantics}
import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext, LayerCreatorOptions}

object OssDataFlow {
Expand All @@ -18,7 +18,7 @@ class OssDataFlowOptions(
) extends LayerCreatorOptions {}

class OssDataFlow(opts: OssDataFlowOptions)(implicit
s: Semantics = Semantics.fromList(DefaultSemantics().elements ++ opts.extraFlows)
s: Semantics = FullNameSemantics.fromList(DefaultSemantics().elements ++ opts.extraFlows)
) extends LayerCreator {

override val overlayName: String = OssDataFlow.overlayName
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.joern.dataflowengineoss.passes.reachingdef

import io.joern.dataflowengineoss.language.*
import io.joern.dataflowengineoss.queryengine.Engine.isOutputArgOfInternalMethod
import io.joern.dataflowengineoss.queryengine.Engine.{isOutputArgOfInternalMethod, semanticsForCall}
import io.joern.dataflowengineoss.semanticsloader.{
FlowMapping,
FlowPath,
Expand Down Expand Up @@ -50,7 +50,7 @@ object EdgeValidator {
*/
private def isCallRetval(parentNode: StoredNode)(implicit semantics: Semantics): Boolean =
parentNode match {
case call: Call => semantics.forMethod(call.methodFullName).exists(!explicitlyFlowsToReturnValue(_))
case call: Call => semanticsForCall(call).exists(!explicitlyFlowsToReturnValue(_))
case _ => false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ReachingDefPass(cpg: Cpg, maxNumberOfDefinitions: Int = 4000)(implicit s:

private val logger: Logger = LoggerFactory.getLogger(this.getClass)
// If there are any regex method full names, load them early
s.loadRegexSemantics(cpg)
s.initialize(cpg)

override def generateParts(): Array[Method] = cpg.method.toArray

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,7 @@ object Engine {
}

def semanticsForCall(call: Call)(implicit semantics: Semantics): List[FlowSemantic] = {
Engine.methodsForCall(call).flatMap { method =>
semantics.forMethod(method.fullName)
}
Engine.methodsForCall(call).flatMap(semantics.forMethod)
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package io.joern.dataflowengineoss.queryengine
import io.joern.dataflowengineoss.queryengine.QueryEngineStatistics.{PATH_CACHE_HITS, PATH_CACHE_MISSES}
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.semanticcpg.language.{toCfgNodeMethods, toExpressionMethods, _}
import io.shiftleft.semanticcpg.language.*

import java.util.concurrent.Callable
import scala.collection.mutable
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package io.joern.dataflowengineoss.semanticsloader

import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Method
import io.shiftleft.semanticcpg.language.*

import scala.collection.mutable

object FullNameSemantics {

def fromList(elements: List[FlowSemantic]): FullNameSemantics = {
new FullNameSemantics(
mutable.Map.newBuilder
.addAll(elements.map { e =>
e.methodFullName -> e
})
.result()
)
}

def empty: FullNameSemantics = fromList(List())

}

class FullNameSemantics private (methodToSemantic: mutable.Map[String, FlowSemantic]) extends Semantics {

/** The map below keeps a mapping between results of a regex and the regex string it matches. e.g.
*
* `path/to/file.py:<module>.Foo.sink` -> `^path.*Foo\\.sink$`
*/
private val regexMatchedFullNames = mutable.HashMap.empty[String, String]

/** Initialize all the method semantics that use regex with all their regex results before query time.
*/
override def initialize(cpg: Cpg): Unit = {
import io.shiftleft.semanticcpg.language._

methodToSemantic.filter(_._2.regex).foreach { case (regexString, _) =>
cpg.method.fullName(regexString).fullName.foreach { methodMatch =>
regexMatchedFullNames.put(methodMatch, regexString)
}
}
}

def elements: List[FlowSemantic] = methodToSemantic.values.toList

private def forMethod(fullName: String): Option[FlowSemantic] = regexMatchedFullNames.get(fullName) match {
case Some(matchedFullName) => methodToSemantic.get(matchedFullName)
case None => methodToSemantic.get(fullName)
}

override def forMethod(method: Method): Option[FlowSemantic] = forMethod(method.fullName)

def serialize: String = {
elements
.sortBy(_.methodFullName)
.map { elem =>
s"\"${elem.methodFullName}\" " + elem.mappings
.collect { case FlowMapping(x, y) => s"$x -> $y" }
.mkString(" ")
}
.mkString("\n")
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package io.joern.dataflowengineoss.semanticsloader

import io.joern.dataflowengineoss.SemanticsParser.MappingContext
import io.joern.dataflowengineoss.{SemanticsBaseListener, SemanticsLexer, SemanticsParser}
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.antlr.v4.runtime.{CharStream, CharStreams, CommonTokenStream}

import scala.collection.mutable
import scala.jdk.CollectionConverters.*

class FullNameSemanticsParser {

def parse(input: String): List[FlowSemantic] = {
val charStream = CharStreams.fromString(input)
parseCharStream(charStream)
}

def parseFile(fileName: String): List[FlowSemantic] = {
val charStream = CharStreams.fromFileName(fileName)
parseCharStream(charStream)
}

private def parseCharStream(charStream: CharStream): List[FlowSemantic] = {
val lexer = new SemanticsLexer(charStream)
val tokenStream = new CommonTokenStream(lexer)
val parser = new SemanticsParser(tokenStream)
val treeWalker = new ParseTreeWalker()

val tree = parser.taintSemantics()
val listener = new Listener()
treeWalker.walk(listener, tree)
listener.result.toList
}

implicit class AntlrFlowExtensions(val ctx: MappingContext) {

def isPassThrough: Boolean = Option(ctx.PASSTHROUGH()).isDefined

def srcIdx: Int = ctx.src().argIdx().NUMBER().getText.toInt

def srcArgName: Option[String] = Option(ctx.src().argName()).map(_.name().getText)

def dstIdx: Int = ctx.dst().argIdx().NUMBER().getText.toInt

def dstArgName: Option[String] = Option(ctx.dst().argName()).map(_.name().getText)

}

private class Listener extends SemanticsBaseListener {

val result: mutable.ListBuffer[FlowSemantic] = mutable.ListBuffer[FlowSemantic]()

override def enterTaintSemantics(ctx: SemanticsParser.TaintSemanticsContext): Unit = {
ctx.singleSemantic().asScala.foreach { semantic =>
val methodName = semantic.methodName().name().getText
val mappings = semantic.mapping().asScala.toList.map(ctxToParamMapping)
result.addOne(FlowSemantic(methodName, mappings))
}
}

private def ctxToParamMapping(ctx: MappingContext): FlowPath =
if (ctx.isPassThrough) {
PassThroughMapping
} else {
val src = ParameterNode(ctx.srcIdx, ctx.srcArgName)
val dst = ParameterNode(ctx.dstIdx, ctx.dstArgName)

FlowMapping(src, dst)
}

}

}
Loading

0 comments on commit 3a530c0

Please sign in to comment.