From 57e518cae27a52e37e405d2df5ee6773bc8b8e64 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Tue, 6 Jun 2023 09:56:12 +0200 Subject: [PATCH] Check TASTy UUIDs in classfiles [Cherry-picked f9e8b366fb8ddb516c08f9b19f4220a0deb20d47] --- .../dotty/tools/dotc/core/SymbolLoaders.scala | 22 +- .../dotc/core/classfile/ClassfileParser.scala | 375 +++++++++--------- .../classfile/ClassfileTastyUUIDParser.scala | 116 ++++++ .../src/dotty/tools/io/AbstractFile.scala | 2 +- 4 files changed, 320 insertions(+), 195 deletions(-) create mode 100644 compiler/src/dotty/tools/dotc/core/classfile/ClassfileTastyUUIDParser.scala diff --git a/compiler/src/dotty/tools/dotc/core/SymbolLoaders.scala b/compiler/src/dotty/tools/dotc/core/SymbolLoaders.scala index 8fe7b3451186..734688ea2ec8 100644 --- a/compiler/src/dotty/tools/dotc/core/SymbolLoaders.scala +++ b/compiler/src/dotty/tools/dotc/core/SymbolLoaders.scala @@ -14,7 +14,7 @@ import dotty.tools.backend.jvm.DottyBackendInterface.symExtensions import Contexts._, Symbols._, Flags._, SymDenotations._, Types._, Scopes._, Names._ import NameOps._ import StdNames._ -import classfile.ClassfileParser +import classfile.{ClassfileParser, ClassfileTastyUUIDParser} import Decorators._ import util.Stats @@ -24,6 +24,7 @@ import ast.desugar import parsing.JavaParsers.OutlineJavaParser import parsing.Parsers.OutlineParser +import dotty.tools.tasty.TastyHeaderUnpickler object SymbolLoaders { @@ -421,14 +422,25 @@ class TastyLoader(val tastyFile: AbstractFile) extends SymbolLoader { override def doComplete(root: SymDenotation)(using Context): Unit = val (classRoot, moduleRoot) = rootDenots(root.asClass) - val unpickler = - val tastyBytes = tastyFile.toByteArray - new tasty.DottyUnpickler(tastyBytes) + val tastyBytes = tastyFile.toByteArray + val unpickler = new tasty.DottyUnpickler(tastyBytes) unpickler.enter(roots = Set(classRoot, moduleRoot, moduleRoot.sourceModule))(using ctx.withSource(util.NoSource)) if mayLoadTreesFromTasty then classRoot.classSymbol.rootTreeOrProvider = unpickler moduleRoot.classSymbol.rootTreeOrProvider = unpickler - // TODO check TASTy UUID matches classfile + checkTastyUUID(tastyFile, tastyBytes) + + + private def checkTastyUUID(tastyFile: AbstractFile, tastyBytes: Array[Byte])(using Context): Unit = + var classfile = tastyFile.resolveSibling(tastyFile.name.stripSuffix(".tasty") + ".class") + if classfile == null then + classfile = tastyFile.resolveSibling(tastyFile.name.stripSuffix(".tasty") + "$.class") + if classfile != null then + val tastyUUID = new TastyHeaderUnpickler(tastyBytes).readHeader() + new ClassfileTastyUUIDParser(classfile)(ctx).checkTastyUUID(tastyUUID) + else + // This will be the case in any of our tests that compile with `-Youtput-only-tasty` + report.inform(s"No classfiles found for $tastyFile when checking TASTy UUID") private def mayLoadTreesFromTasty(using Context): Boolean = ctx.settings.YretainTrees.value || ctx.settings.fromTasty.value diff --git a/compiler/src/dotty/tools/dotc/core/classfile/ClassfileParser.scala b/compiler/src/dotty/tools/dotc/core/classfile/ClassfileParser.scala index b3a2aaa87193..5e816502f359 100644 --- a/compiler/src/dotty/tools/dotc/core/classfile/ClassfileParser.scala +++ b/compiler/src/dotty/tools/dotc/core/classfile/ClassfileParser.scala @@ -26,6 +26,9 @@ import scala.util.control.NonFatal import dotty.tools.dotc.classpath.FileUtils.classToTasty object ClassfileParser { + + import ClassfileConstants._ + /** Marker trait for unpicklers that can be embedded in classfiles. */ trait Embedded @@ -51,6 +54,177 @@ object ClassfileParser { mapOver(tp) } } + + abstract class AbstractConstantPool(using in: DataReader) { + protected val len = in.nextChar + protected val starts = new Array[Int](len) + protected val values = new Array[AnyRef](len) + protected val internalized = new Array[NameOrString](len) + + { var i = 1 + while (i < starts.length) { + starts(i) = in.bp + i += 1 + (in.nextByte.toInt: @switch) match { + case CONSTANT_UTF8 | CONSTANT_UNICODE => + in.skip(in.nextChar) + case CONSTANT_CLASS | CONSTANT_STRING | CONSTANT_METHODTYPE => + in.skip(2) + case CONSTANT_METHODHANDLE => + in.skip(3) + case CONSTANT_FIELDREF | CONSTANT_METHODREF | CONSTANT_INTFMETHODREF + | CONSTANT_NAMEANDTYPE | CONSTANT_INTEGER | CONSTANT_FLOAT + | CONSTANT_INVOKEDYNAMIC => + in.skip(4) + case CONSTANT_LONG | CONSTANT_DOUBLE => + in.skip(8) + i += 1 + case _ => + errorBadTag(in.bp - 1) + } + } + } + + /** Return the name found at given index. */ + def getName(index: Int)(using in: DataReader): NameOrString = { + if (index <= 0 || len <= index) + errorBadIndex(index) + + values(index) match { + case name: NameOrString => name + case null => + val start = starts(index) + if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) + val len = in.getChar(start + 1).toInt + val name = new NameOrString(in.getUTF(start + 1, len + 2)) + values(index) = name + name + } + } + + /** Return the name found at given index in the constant pool, with '/' replaced by '.'. */ + def getExternalName(index: Int)(using in: DataReader): NameOrString = { + if (index <= 0 || len <= index) + errorBadIndex(index) + + if (internalized(index) == null) + internalized(index) = new NameOrString(getName(index).value.replace('/', '.')) + + internalized(index) + } + + def getClassSymbol(index: Int)(using ctx: Context, in: DataReader): Symbol + + /** Return the external name of the class info structure found at 'index'. + * Use 'getClassSymbol' if the class is sure to be a top-level class. + */ + def getClassName(index: Int)(using in: DataReader): NameOrString = { + val start = starts(index) + if (in.getByte(start).toInt != CONSTANT_CLASS) errorBadTag(start) + getExternalName(in.getChar(start + 1)) + } + + /** Return the type of a class constant entry. Since + * arrays are considered to be class types, they might + * appear as entries in 'newarray' or 'cast' opcodes. + */ + def getClassOrArrayType(index: Int)(using ctx: Context, in: DataReader): Type + + def getType(index: Int, isVarargs: Boolean = false)(using Context, DataReader): Type + + def getSuperClass(index: Int)(using Context, DataReader): Symbol = { + assert(index != 0, "attempt to parse java.lang.Object from classfile") + getClassSymbol(index) + } + + def getConstant(index: Int)(using ctx: Context, in: DataReader): Constant = { + if (index <= 0 || len <= index) errorBadIndex(index) + var value = values(index) + if (value eq null) { + val start = starts(index) + value = (in.getByte(start).toInt: @switch) match { + case CONSTANT_STRING => + Constant(getName(in.getChar(start + 1).toInt).value) + case CONSTANT_INTEGER => + Constant(in.getInt(start + 1)) + case CONSTANT_FLOAT => + Constant(in.getFloat(start + 1)) + case CONSTANT_LONG => + Constant(in.getLong(start + 1)) + case CONSTANT_DOUBLE => + Constant(in.getDouble(start + 1)) + case CONSTANT_CLASS => + getClassOrArrayType(index).typeSymbol + case _ => + errorBadTag(start) + } + values(index) = value + } + value match { + case ct: Constant => ct + case cls: Symbol => Constant(cls.typeRef) + case arr: Type => Constant(arr) + } + } + + private def getSubArray(bytes: Array[Byte]): Array[Byte] = { + val decodedLength = ByteCodecs.decode(bytes) + val arr = new Array[Byte](decodedLength) + System.arraycopy(bytes, 0, arr, 0, decodedLength) + arr + } + + def getBytes(index: Int)(using in: DataReader): Array[Byte] = { + if (index <= 0 || len <= index) errorBadIndex(index) + var value = values(index).asInstanceOf[Array[Byte]] + if (value eq null) { + val start = starts(index) + if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) + val len = in.getChar(start + 1) + val bytes = new Array[Byte](len) + in.getBytes(start + 3, bytes) + value = getSubArray(bytes) + values(index) = value + } + value + } + + def getBytes(indices: List[Int])(using in: DataReader): Array[Byte] = { + assert(!indices.isEmpty, indices) + var value = values(indices.head).asInstanceOf[Array[Byte]] + if (value eq null) { + val bytesBuffer = ArrayBuffer.empty[Byte] + for (index <- indices) { + if (index <= 0 || AbstractConstantPool.this.len <= index) errorBadIndex(index) + val start = starts(index) + if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) + val len = in.getChar(start + 1) + val buf = new Array[Byte](len) + in.getBytes(start + 3, buf) + bytesBuffer ++= buf + } + value = getSubArray(bytesBuffer.toArray) + values(indices.head) = value + } + value + } + + /** Throws an exception signaling a bad constant index. */ + protected def errorBadIndex(index: Int)(using in: DataReader) = + throw new RuntimeException("bad constant pool index: " + index + " at pos: " + in.bp) + + /** Throws an exception signaling a bad tag at given address. */ + protected def errorBadTag(start: Int)(using in: DataReader) = + throw new RuntimeException("bad constant pool tag " + in.getByte(start) + " at byte " + start) + } + + protected class NameOrString(val value: String) { + private var _name: SimpleName = null + def name: SimpleName = { + if (_name eq null) _name = termName(value) + _name + } + } } class ClassfileParser( @@ -939,26 +1113,11 @@ class ClassfileParser( } if (scan(tpnme.TASTYATTR)) { - val attrLen = in.nextInt - val bytes = in.nextBytes(attrLen) - if (attrLen == 16) { // A tasty attribute with that has only a UUID (16 bytes) implies the existence of the .tasty file - classfile.classToTasty match - case None => - report.error(em"Could not find TASTY for $classfile") - case Some(tastyFile) => - val expectedUUID = - val reader = new TastyReader(bytes, 0, 16) - new UUID(reader.readUncompressedLong(), reader.readUncompressedLong()) - val tastyUUID = - val tastyBytes: Array[Byte] = tastyFile.toByteArray - new TastyHeaderUnpickler(tastyBytes).readHeader() - if (expectedUUID != tastyUUID) - report.warning(s"$classfile is out of sync with its TASTy file. Loaded TASTy file. Try cleaning the project to fix this issue", NoSourcePosition) - return None - } - else - // Before 3.0.0 we had a mode where we could embed the TASTY bytes in the classfile. This has not been supported in any stable release. - report.error(s"Found a TASTY attribute with a length different from 16 in $classfile. This is likely a bug in the compiler. Please report.", NoSourcePosition) + val hint = + if classfile.classToTasty.isDefined then "This is likely a bug in the compiler. Please report." + else "This `.tasty` file is missing. Try cleaning the project to fix this issue." + report.error(s"Loading Scala 3 binary from $classfile. It should have been loaded from `.tasty` file. $hint", NoSourcePosition) + return None } if scan(tpnme.ScalaATTR) && !scalaUnpickleWhitelist.contains(classRoot.name) @@ -1127,78 +1286,7 @@ class ClassfileParser( private def isStatic(flags: Int) = (flags & JAVA_ACC_STATIC) != 0 private def hasAnnotation(flags: Int) = (flags & JAVA_ACC_ANNOTATION) != 0 - protected class NameOrString(val value: String) { - private var _name: SimpleName = null - def name: SimpleName = { - if (_name eq null) _name = termName(value) - _name - } - } - - def getClassSymbol(name: SimpleName)(using Context): Symbol = - if (name.endsWith("$") && (name ne nme.nothingRuntimeClass) && (name ne nme.nullRuntimeClass)) - // Null$ and Nothing$ ARE classes - requiredModule(name.dropRight(1)) - else classNameToSymbol(name) - - class ConstantPool(using in: DataReader) { - private val len = in.nextChar - private val starts = new Array[Int](len) - private val values = new Array[AnyRef](len) - private val internalized = new Array[NameOrString](len) - - { var i = 1 - while (i < starts.length) { - starts(i) = in.bp - i += 1 - (in.nextByte.toInt: @switch) match { - case CONSTANT_UTF8 | CONSTANT_UNICODE => - in.skip(in.nextChar) - case CONSTANT_CLASS | CONSTANT_STRING | CONSTANT_METHODTYPE => - in.skip(2) - case CONSTANT_METHODHANDLE => - in.skip(3) - case CONSTANT_FIELDREF | CONSTANT_METHODREF | CONSTANT_INTFMETHODREF - | CONSTANT_NAMEANDTYPE | CONSTANT_INTEGER | CONSTANT_FLOAT - | CONSTANT_INVOKEDYNAMIC => - in.skip(4) - case CONSTANT_LONG | CONSTANT_DOUBLE => - in.skip(8) - i += 1 - case _ => - errorBadTag(in.bp - 1) - } - } - } - - /** Return the name found at given index. */ - def getName(index: Int)(using in: DataReader): NameOrString = { - if (index <= 0 || len <= index) - errorBadIndex(index) - - values(index) match { - case name: NameOrString => name - case null => - val start = starts(index) - if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) - val len = in.getChar(start + 1).toInt - val name = new NameOrString(in.getUTF(start + 1, len + 2)) - values(index) = name - name - } - } - - /** Return the name found at given index in the constant pool, with '/' replaced by '.'. */ - def getExternalName(index: Int)(using in: DataReader): NameOrString = { - if (index <= 0 || len <= index) - errorBadIndex(index) - - if (internalized(index) == null) - internalized(index) = new NameOrString(getName(index).value.replace('/', '.')) - - internalized(index) - } - + class ConstantPool(using in: DataReader) extends AbstractConstantPool { def getClassSymbol(index: Int)(using ctx: Context, in: DataReader): Symbol = { if (index <= 0 || len <= index) errorBadIndex(index) var c = values(index).asInstanceOf[Symbol] @@ -1212,19 +1300,6 @@ class ClassfileParser( c } - /** Return the external name of the class info structure found at 'index'. - * Use 'getClassSymbol' if the class is sure to be a top-level class. - */ - def getClassName(index: Int)(using in: DataReader): NameOrString = { - val start = starts(index) - if (in.getByte(start).toInt != CONSTANT_CLASS) errorBadTag(start) - getExternalName(in.getChar(start + 1)) - } - - /** Return the type of a class constant entry. Since - * arrays are considered to be class types, they might - * appear as entries in 'newarray' or 'cast' opcodes. - */ def getClassOrArrayType(index: Int)(using ctx: Context, in: DataReader): Type = { if (index <= 0 || len <= index) errorBadIndex(index) val value = values(index) @@ -1252,90 +1327,12 @@ class ClassfileParser( def getType(index: Int, isVarargs: Boolean = false)(using Context, DataReader): Type = sigToType(getExternalName(index).value, isVarargs = isVarargs) + } - def getSuperClass(index: Int)(using Context, DataReader): Symbol = { - assert(index != 0, "attempt to parse java.lang.Object from classfile") - getClassSymbol(index) - } - - def getConstant(index: Int)(using ctx: Context, in: DataReader): Constant = { - if (index <= 0 || len <= index) errorBadIndex(index) - var value = values(index) - if (value eq null) { - val start = starts(index) - value = (in.getByte(start).toInt: @switch) match { - case CONSTANT_STRING => - Constant(getName(in.getChar(start + 1).toInt).value) - case CONSTANT_INTEGER => - Constant(in.getInt(start + 1)) - case CONSTANT_FLOAT => - Constant(in.getFloat(start + 1)) - case CONSTANT_LONG => - Constant(in.getLong(start + 1)) - case CONSTANT_DOUBLE => - Constant(in.getDouble(start + 1)) - case CONSTANT_CLASS => - getClassOrArrayType(index).typeSymbol - case _ => - errorBadTag(start) - } - values(index) = value - } - value match { - case ct: Constant => ct - case cls: Symbol => Constant(cls.typeRef) - case arr: Type => Constant(arr) - } - } - - private def getSubArray(bytes: Array[Byte]): Array[Byte] = { - val decodedLength = ByteCodecs.decode(bytes) - val arr = new Array[Byte](decodedLength) - System.arraycopy(bytes, 0, arr, 0, decodedLength) - arr - } - - def getBytes(index: Int)(using in: DataReader): Array[Byte] = { - if (index <= 0 || len <= index) errorBadIndex(index) - var value = values(index).asInstanceOf[Array[Byte]] - if (value eq null) { - val start = starts(index) - if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) - val len = in.getChar(start + 1) - val bytes = new Array[Byte](len) - in.getBytes(start + 3, bytes) - value = getSubArray(bytes) - values(index) = value - } - value - } - - def getBytes(indices: List[Int])(using in: DataReader): Array[Byte] = { - assert(!indices.isEmpty, indices) - var value = values(indices.head).asInstanceOf[Array[Byte]] - if (value eq null) { - val bytesBuffer = ArrayBuffer.empty[Byte] - for (index <- indices) { - if (index <= 0 || ConstantPool.this.len <= index) errorBadIndex(index) - val start = starts(index) - if (in.getByte(start).toInt != CONSTANT_UTF8) errorBadTag(start) - val len = in.getChar(start + 1) - val buf = new Array[Byte](len) - in.getBytes(start + 3, buf) - bytesBuffer ++= buf - } - value = getSubArray(bytesBuffer.toArray) - values(indices.head) = value - } - value - } - - /** Throws an exception signaling a bad constant index. */ - private def errorBadIndex(index: Int)(using in: DataReader) = - throw new RuntimeException("bad constant pool index: " + index + " at pos: " + in.bp) + def getClassSymbol(name: SimpleName)(using Context): Symbol = + if (name.endsWith("$") && (name ne nme.nothingRuntimeClass) && (name ne nme.nullRuntimeClass)) + // Null$ and Nothing$ ARE classes + requiredModule(name.dropRight(1)) + else classNameToSymbol(name) - /** Throws an exception signaling a bad tag at given address. */ - private def errorBadTag(start: Int)(using in: DataReader) = - throw new RuntimeException("bad constant pool tag " + in.getByte(start) + " at byte " + start) - } } diff --git a/compiler/src/dotty/tools/dotc/core/classfile/ClassfileTastyUUIDParser.scala b/compiler/src/dotty/tools/dotc/core/classfile/ClassfileTastyUUIDParser.scala new file mode 100644 index 000000000000..a9c91a68bb60 --- /dev/null +++ b/compiler/src/dotty/tools/dotc/core/classfile/ClassfileTastyUUIDParser.scala @@ -0,0 +1,116 @@ +package dotty.tools.dotc +package core.classfile + +import scala.language.unsafeNulls + +import dotty.tools.dotc.core.Contexts._ +import dotty.tools.dotc.core.Decorators._ +import dotty.tools.dotc.core.Names._ +import dotty.tools.dotc.core.StdNames._ +import dotty.tools.dotc.core.Symbols._ +import dotty.tools.dotc.core.Types._ +import dotty.tools.dotc.util._ +import dotty.tools.io.AbstractFile +import dotty.tools.tasty.TastyReader + +import java.io.IOException +import java.lang.Integer.toHexString +import java.util.UUID + +class ClassfileTastyUUIDParser(classfile: AbstractFile)(ictx: Context) { + + import ClassfileConstants._ + + private var pool: ConstantPool = _ // the classfile's constant pool + + def checkTastyUUID(tastyUUID: UUID)(using Context): Unit = try ctx.base.reusableDataReader.withInstance { reader => + implicit val reader2 = reader.reset(classfile) + parseHeader() + this.pool = new ConstantPool + checkTastyAttr(tastyUUID) + this.pool = null + } + catch { + case e: RuntimeException => + if (ctx.debug) e.printStackTrace() + throw new IOException( + i"""class file ${classfile.canonicalPath} is broken, reading aborted with ${e.getClass} + |${Option(e.getMessage).getOrElse("")}""") + } + + private def parseHeader()(using in: DataReader): Unit = { + val magic = in.nextInt + if (magic != JAVA_MAGIC) + throw new IOException(s"class file '${classfile}' has wrong magic number 0x${toHexString(magic)}, should be 0x${toHexString(JAVA_MAGIC)}") + val minorVersion = in.nextChar.toInt + val majorVersion = in.nextChar.toInt + if ((majorVersion < JAVA_MAJOR_VERSION) || + ((majorVersion == JAVA_MAJOR_VERSION) && + (minorVersion < JAVA_MINOR_VERSION))) + throw new IOException( + s"class file '${classfile}' has unknown version $majorVersion.$minorVersion, should be at least $JAVA_MAJOR_VERSION.$JAVA_MINOR_VERSION") + } + + private def checkTastyAttr(tastyUUID: UUID)(using ctx: Context, in: DataReader): Unit = { + in.nextChar // jflags + in.nextChar // nameIdx + skipSuperclasses() + skipMembers() // fields + skipMembers() // methods + val attrs = in.nextChar + val attrbp = in.bp + + def scan(target: TypeName): Boolean = { + in.bp = attrbp + var i = 0 + while (i < attrs && pool.getName(in.nextChar).name.toTypeName != target) { + val attrLen = in.nextInt + in.skip(attrLen) + i += 1 + } + i < attrs + } + + if (scan(tpnme.TASTYATTR)) { + val attrLen = in.nextInt + val bytes = in.nextBytes(attrLen) + if (attrLen == 16) { // A tasty attribute with that has only a UUID (16 bytes) implies the existence of the .tasty file + val expectedUUID = + val reader = new TastyReader(bytes, 0, 16) + new UUID(reader.readUncompressedLong(), reader.readUncompressedLong()) + if (expectedUUID != tastyUUID) + report.warning(s"$classfile is out of sync with its TASTy file. Loaded TASTy file. Try cleaning the project to fix this issue", NoSourcePosition) + } + else + // Before 3.0.0 we had a mode where we could embed the TASTY bytes in the classfile. This has not been supported in any stable release. + report.error(s"Found a TASTY attribute with a length different from 16 in $classfile. This is likely a bug in the compiler. Please report.", NoSourcePosition) + } + + } + + private def skipAttributes()(using in: DataReader): Unit = { + val attrCount = in.nextChar + for (i <- 0 until attrCount) { + in.skip(2); in.skip(in.nextInt) + } + } + + private def skipMembers()(using in: DataReader): Unit = { + val memberCount = in.nextChar + for (i <- 0 until memberCount) { + in.skip(6); skipAttributes() + } + } + + private def skipSuperclasses()(using in: DataReader): Unit = { + in.skip(2) // superclass + val ifaces = in.nextChar + in.skip(2 * ifaces) + } + + class ConstantPool(using in: DataReader) extends ClassfileParser.AbstractConstantPool { + def getClassOrArrayType(index: Int)(using ctx: Context, in: DataReader): Type = throw new UnsupportedOperationException + def getClassSymbol(index: Int)(using ctx: Context, in: DataReader): Symbol = throw new UnsupportedOperationException + def getType(index: Int, isVarargs: Boolean)(using x$3: Context, x$4: DataReader): Type = throw new UnsupportedOperationException + } +} diff --git a/compiler/src/dotty/tools/io/AbstractFile.scala b/compiler/src/dotty/tools/io/AbstractFile.scala index fd9e2281181b..09779953fc76 100644 --- a/compiler/src/dotty/tools/io/AbstractFile.scala +++ b/compiler/src/dotty/tools/io/AbstractFile.scala @@ -253,7 +253,7 @@ abstract class AbstractFile extends Iterable[AbstractFile] { /** Returns the sibling abstract file in the parent of this abstract file or directory. * If there is no such file, returns `null`. */ - def resolveSibling(name: String): AbstractFile = + def resolveSibling(name: String): AbstractFile | Null = container.lookupName(name, directory = false) private def fileOrSubdirectoryNamed(name: String, isDir: Boolean): AbstractFile =