-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Node range #194
Node range #194
Changes from 45 commits
65a489d
4d88dde
b681d84
b6ea814
2c9e535
c3413c5
b5ab209
84badf7
8b006e5
32a6917
8d376d9
95400d8
b858836
4ca400d
02192f5
8d15d54
8798c06
7badac3
5523bc7
abbb4f1
4194258
dfcb5eb
a4eceee
38e705b
734a2a9
da2faf4
d4e60b3
c47517d
f4bea6e
e2b9750
70b820e
3d7ae17
5844f8d
1f4999a
beab3d2
c1df042
383425e
2ba6bef
edc24d3
396ab20
2186e89
8a64df8
5898169
cbb70f0
f2a96a6
64219d2
66716c0
bd76ac1
95c33e5
bf5914b
5f86b14
37ae185
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package astminer.common | ||
|
||
import astminer.common.model.Node | ||
import astminer.common.model.NodeRange | ||
|
||
/** Node simplest implementation **/ | ||
class SimpleNode( | ||
override val typeLabel: String, | ||
override val children: MutableList<SimpleNode>, | ||
override val parent: Node? = null, | ||
override val range: NodeRange? = null, | ||
token: String? | ||
) : Node(token) { | ||
override fun removeChildrenOfType(typeLabel: String) { | ||
children.removeIf { it.typeLabel == typeLabel } | ||
} | ||
|
||
override fun getChildrenOfType(typeLabel: String) = super.getChildrenOfType(typeLabel).map { it as SimpleNode } | ||
override fun getChildOfType(typeLabel: String) = super.getChildOfType(typeLabel) as? SimpleNode | ||
|
||
override fun preOrder() = super.preOrder().map { it as SimpleNode } | ||
override fun postOrder() = super.postOrder().map { it as SimpleNode } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package astminer.common | ||
|
||
object TokenNormalization { | ||
const val EMPTY_TOKEN = "<E>" | ||
const val TOKEN_DELIMITER = "|" | ||
|
||
private val newLineReg = "\\\\n".toRegex() | ||
private val whitespaceReg = "//s+".toRegex() | ||
private val quotesApostrophesCommasReg = "[\"',]".toRegex() | ||
private val unicodeWeirdCharReg = "\\P{Print}".toRegex() | ||
private val notALetterReg = "[^A-Za-z]".toRegex() | ||
|
||
private val splitRegex = "(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+".toRegex() | ||
|
||
fun normalizeToken(token: String?): String { | ||
if (token == null) return EMPTY_TOKEN | ||
val subTokens = splitToSubtokens(token) | ||
return if (subTokens.isEmpty()) EMPTY_TOKEN else subTokens.joinToString(TOKEN_DELIMITER) | ||
} | ||
|
||
/** | ||
* The function was adopted from the original code2vec implementation in order to match their behavior: | ||
* https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java | ||
*/ | ||
fun splitToSubtokens(token: String) = token | ||
.trim() | ||
.split(splitRegex) | ||
.map { s -> normalizeSubToken(s, "") } | ||
.filter { it.isNotEmpty() } | ||
.toList() | ||
|
||
/** | ||
* The function was adopted from the original code2vec implementation in order to match their behavior: | ||
* https://github.com/tech-srl/code2vec/blob/master/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java | ||
*/ | ||
fun normalizeSubToken(token: String, defaultToken: String): String { | ||
val cleanToken = token.lowercase() | ||
.replace(newLineReg, "") // escaped new line | ||
.replace(whitespaceReg, "") // whitespaces | ||
.replace(quotesApostrophesCommasReg, "") // quotes, apostrophes, commas | ||
.replace(unicodeWeirdCharReg, "") // unicode weird characters | ||
|
||
val stripped = cleanToken.replace(notALetterReg, "") | ||
|
||
return stripped.ifEmpty { | ||
val carefulStripped = cleanToken.replace(" ", "_") | ||
carefulStripped.ifEmpty { | ||
defaultToken | ||
} | ||
} | ||
} | ||
} |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package astminer.common.model | ||
|
||
import astminer.common.TokenNormalization | ||
|
||
class Token(val original: String?) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
val final: String | ||
get() = technical ?: normalized | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it's better to declare the final as a function? fun final() = technical ?: normalized From a usage perspective, the property seems like it was created with a class instance and is permanent. |
||
|
||
var technical: String? = null | ||
|
||
val normalized = TokenNormalization.normalizeToken(original) | ||
|
||
override fun toString(): String = final | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where do you use vars in data class?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I use var in
Token
class and for some reason without this option detekt suggests me to use data class, And when i do this detekt reports about var usage inToken
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I can see, at the moment
Token
is not a data class. And detekt doesn't report anything.