Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dataflowengineoss] Turn Semantics into a node-directed trait #4920

Merged
merged 4 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.joern.dataflowengineoss

import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, PassThroughMapping, Semantics}
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, PassThroughMapping, FullNameSemantics}
import io.shiftleft.codepropertygraph.generated.Operators

import scala.annotation.unused
Expand All @@ -10,9 +10,9 @@ object DefaultSemantics {
/** @return
* a default set of common external procedure calls for all languages.
*/
def apply(): Semantics = {
def apply(): FullNameSemantics = {
val list = operatorFlows ++ cFlows ++ javaFlows
Semantics.fromList(list)
FullNameSemantics.fromList(list)
}

private def F = (x: String, y: List[(Int, Int)]) => FlowSemantic.from(x, y)
Expand Down Expand Up @@ -157,6 +157,6 @@ object DefaultSemantics {
* procedure semantics for operators and common external Java calls only.
*/
@unused
def javaSemantics(): Semantics = Semantics.fromList(operatorFlows ++ javaFlows)
def javaSemantics(): FullNameSemantics = FullNameSemantics.fromList(operatorFlows ++ javaFlows)

}
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ class ExpressionMethods[NodeType <: Expression](val node: NodeType) extends AnyV
/** Retrieve flow semantic for the call this argument is a part of.
*/
def semanticsForCallByArg(implicit semantics: Semantics): Iterator[FlowSemantic] = {
argToMethods(node).flatMap { method =>
semantics.forMethod(method.fullName)
}
argToMethods(node).flatMap(semantics.forMethod)
}

private def argToMethods(arg: Expression): Iterator[Method] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package io.joern.dataflowengineoss.layers.dataflows

import io.joern.dataflowengineoss.DefaultSemantics
import io.joern.dataflowengineoss.passes.reachingdef.ReachingDefPass
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics}
import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, FullNameSemantics, Semantics}
import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext, LayerCreatorOptions}

object OssDataFlow {
Expand All @@ -18,7 +18,7 @@ class OssDataFlowOptions(
) extends LayerCreatorOptions {}

class OssDataFlow(opts: OssDataFlowOptions)(implicit
s: Semantics = Semantics.fromList(DefaultSemantics().elements ++ opts.extraFlows)
s: Semantics = FullNameSemantics.fromList(DefaultSemantics().elements ++ opts.extraFlows)
) extends LayerCreator {

override val overlayName: String = OssDataFlow.overlayName
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.joern.dataflowengineoss.passes.reachingdef

import io.joern.dataflowengineoss.language.*
import io.joern.dataflowengineoss.queryengine.Engine.isOutputArgOfInternalMethod
import io.joern.dataflowengineoss.queryengine.Engine.{isOutputArgOfInternalMethod, semanticsForCall}
import io.joern.dataflowengineoss.semanticsloader.{
FlowMapping,
FlowPath,
Expand Down Expand Up @@ -50,7 +50,7 @@ object EdgeValidator {
*/
private def isCallRetval(parentNode: StoredNode)(implicit semantics: Semantics): Boolean =
parentNode match {
case call: Call => semantics.forMethod(call.methodFullName).exists(!explicitlyFlowsToReturnValue(_))
case call: Call => semanticsForCall(call).exists(!explicitlyFlowsToReturnValue(_))
case _ => false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class ReachingDefPass(cpg: Cpg, maxNumberOfDefinitions: Int = 4000)(implicit s:

private val logger: Logger = LoggerFactory.getLogger(this.getClass)
// If there are any regex method full names, load them early
s.loadRegexSemantics(cpg)
s.initialize(cpg)

override def generateParts(): Array[Method] = cpg.method.toArray

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,7 @@ object Engine {
}

def semanticsForCall(call: Call)(implicit semantics: Semantics): List[FlowSemantic] = {
Engine.methodsForCall(call).flatMap { method =>
semantics.forMethod(method.fullName)
}
Engine.methodsForCall(call).flatMap(semantics.forMethod)
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package io.joern.dataflowengineoss.queryengine
import io.joern.dataflowengineoss.queryengine.QueryEngineStatistics.{PATH_CACHE_HITS, PATH_CACHE_MISSES}
import io.joern.dataflowengineoss.semanticsloader.Semantics
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.semanticcpg.language.{toCfgNodeMethods, toExpressionMethods, _}
import io.shiftleft.semanticcpg.language.*

import java.util.concurrent.Callable
import scala.collection.mutable
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package io.joern.dataflowengineoss.semanticsloader

import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Method
import io.shiftleft.semanticcpg.language.*

import scala.collection.mutable

object FullNameSemantics {

def fromList(elements: List[FlowSemantic]): FullNameSemantics = {
new FullNameSemantics(
mutable.Map.newBuilder
.addAll(elements.map { e =>
e.methodFullName -> e
})
.result()
)
}

def empty: FullNameSemantics = fromList(List())

}

class FullNameSemantics private (methodToSemantic: mutable.Map[String, FlowSemantic]) extends Semantics {

/** The map below keeps a mapping between results of a regex and the regex string it matches. e.g.
*
* `path/to/file.py:<module>.Foo.sink` -> `^path.*Foo\\.sink$`
*/
private val regexMatchedFullNames = mutable.HashMap.empty[String, String]

/** Initialize all the method semantics that use regex with all their regex results before query time.
*/
override def initialize(cpg: Cpg): Unit = {
import io.shiftleft.semanticcpg.language._

methodToSemantic.filter(_._2.regex).foreach { case (regexString, _) =>
cpg.method.fullName(regexString).fullName.foreach { methodMatch =>
regexMatchedFullNames.put(methodMatch, regexString)
}
}
}

def elements: List[FlowSemantic] = methodToSemantic.values.toList

private def forMethod(fullName: String): Option[FlowSemantic] = regexMatchedFullNames.get(fullName) match {
case Some(matchedFullName) => methodToSemantic.get(matchedFullName)
case None => methodToSemantic.get(fullName)
}

override def forMethod(method: Method): Option[FlowSemantic] = forMethod(method.fullName)

def serialize: String = {
elements
.sortBy(_.methodFullName)
.map { elem =>
s"\"${elem.methodFullName}\" " + elem.mappings
.collect { case FlowMapping(x, y) => s"$x -> $y" }
.mkString(" ")
}
.mkString("\n")
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package io.joern.dataflowengineoss.semanticsloader

import io.joern.dataflowengineoss.SemanticsParser.MappingContext
import io.joern.dataflowengineoss.{SemanticsBaseListener, SemanticsLexer, SemanticsParser}
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.antlr.v4.runtime.{CharStream, CharStreams, CommonTokenStream}

import scala.collection.mutable
import scala.jdk.CollectionConverters.*

class FullNameSemanticsParser {

def parse(input: String): List[FlowSemantic] = {
val charStream = CharStreams.fromString(input)
parseCharStream(charStream)
}

def parseFile(fileName: String): List[FlowSemantic] = {
val charStream = CharStreams.fromFileName(fileName)
parseCharStream(charStream)
}

private def parseCharStream(charStream: CharStream): List[FlowSemantic] = {
val lexer = new SemanticsLexer(charStream)
val tokenStream = new CommonTokenStream(lexer)
val parser = new SemanticsParser(tokenStream)
val treeWalker = new ParseTreeWalker()

val tree = parser.taintSemantics()
val listener = new Listener()
treeWalker.walk(listener, tree)
listener.result.toList
}

implicit class AntlrFlowExtensions(val ctx: MappingContext) {

def isPassThrough: Boolean = Option(ctx.PASSTHROUGH()).isDefined

def srcIdx: Int = ctx.src().argIdx().NUMBER().getText.toInt

def srcArgName: Option[String] = Option(ctx.src().argName()).map(_.name().getText)

def dstIdx: Int = ctx.dst().argIdx().NUMBER().getText.toInt

def dstArgName: Option[String] = Option(ctx.dst().argName()).map(_.name().getText)

}

private class Listener extends SemanticsBaseListener {

val result: mutable.ListBuffer[FlowSemantic] = mutable.ListBuffer[FlowSemantic]()

override def enterTaintSemantics(ctx: SemanticsParser.TaintSemanticsContext): Unit = {
ctx.singleSemantic().asScala.foreach { semantic =>
val methodName = semantic.methodName().name().getText
val mappings = semantic.mapping().asScala.toList.map(ctxToParamMapping)
result.addOne(FlowSemantic(methodName, mappings))
}
}

private def ctxToParamMapping(ctx: MappingContext): FlowPath =
if (ctx.isPassThrough) {
PassThroughMapping
} else {
val src = ParameterNode(ctx.srcIdx, ctx.srcArgName)
val dst = ParameterNode(ctx.dstIdx, ctx.dstArgName)

FlowMapping(src, dst)
}

}

}
Loading
Loading