Skip to content

Commit

Permalink
FactorGenerationStage: Remove messageIds factor table
Browse files Browse the repository at this point in the history
  • Loading branch information
szarnyasg committed Aug 28, 2023
1 parent 6d59514 commit 829b7a0
Showing 1 changed file with 0 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,22 +164,6 @@ object FactorGenerationStage extends DatagenStage with Logging {
import model.raw._

private val rawFactors = Map(
"messageIds" -> Factor(CommentType, PostType) { case Seq(comments, posts) =>
val messages =
(comments.select($"creationDate", $"deletionDate", $"id".as("MessageId"))
|+| posts.select($"creationDate", $"deletionDate", $"id".as("MessageId"))
)
.select(
date_trunc("day", $"creationDate").as("creationDay"),
date_trunc("day", $"deletionDate").as("deletionDay"),
$"MessageId")
.orderBy($"MessageId")

val sampleSize = 200.0
val count = messages.count()
val sampleFraction = Math.min(sampleSize / count, 1.0)
messages.sample(sampleFraction, 42)
},
"countryNumPersons" -> Factor(PlaceType, PersonType) { case Seq(places, persons) =>
val cities = places.where($"type" === "City").cache()
val countries = places.where($"type" === "Country").cache()
Expand Down

0 comments on commit 829b7a0

Please sign in to comment.