Skip to content

Commit

Permalink
support ucas/utoc assets for UE5 (not tested yet)
Browse files Browse the repository at this point in the history
  • Loading branch information
matyalatte committed Apr 24, 2024
1 parent 603f3ff commit 7b7f563
Show file tree
Hide file tree
Showing 11 changed files with 1,161 additions and 433 deletions.
1 change: 1 addition & 0 deletions docs/changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
- Supported ucas/utoc assets for UE5
- Added support for UE5.4.
- Fixed typos.

Expand Down
51 changes: 45 additions & 6 deletions src/unreal/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class ArchiveBase:
io: IOBase
is_reading = False
is_writing = False
is_ucas = False

def __init__(self, io: IOBase, endian="little", context: dict = {}):
self.io = io
Expand Down Expand Up @@ -65,13 +66,35 @@ def write(self, obj):
def close(self):
self.io.close()

def raise_error(self, msg="Parse failed. Make sure you specified UE version correctly."):
if (hasattr(self, "uasset")):
msg += " (" + self.uasset.file_name + ")"
raise RuntimeError(msg)

def check(self, actual, expected, msg="Parse failed. Make sure you specified UE version correctly."):
if actual == expected:
return
print(f"offset: {self.tell()}")
print(f"actual: {actual}")
print(f"expected: {expected}")
raise RuntimeError(msg)
self.raise_error(msg)

def check_buffer_size(self, size):
if self.tell() + size > self.size:
raise RuntimeError(
"There is no buffer that has specified size."
f" (Offset: {self.tell()}, Size: {size})"
)

def update_with_current_offset(self, obj, attr_name):
if self.is_reading:
# Checks obj.attr_name is the same as the current offset
current_offs = self.tell()
serialized_offs = getattr(obj, attr_name)
self.check(serialized_offs, current_offs)
else:
# Update obj.attr_name with the current offset
setattr(obj, attr_name, self.tell())


class ArchiveRead(ArchiveBase):
Expand Down Expand Up @@ -121,11 +144,7 @@ class Buffer(Bytes):
@staticmethod
def read(ar: ArchiveBase) -> bytes:
size = ar.args[0]
if ar.tell() + size > ar.size:
raise RuntimeError(
"There is no buffer that has specified size."
f" (Offset: {ar.tell()}, Size: {size})"
)
ar.check_buffer_size(size)
return ar.read(size)


Expand Down Expand Up @@ -234,6 +253,26 @@ def write(ar: ArchiveBase, val: str):
ar.write(str_byte + b"\x00" * (1 + utf16))


class StringWithLen:
@staticmethod
def get_args(ar: ArchiveBase):
num = ar.args[0]
utf16 = ar.args[1]
encode = "utf-16-le" if utf16 else "ascii"
return num, utf16, encode

@staticmethod
def read(ar: ArchiveBase) -> str:
num, utf16, encode = StringWithLen.get_args(ar)
string = ar.read(num * (1 + utf16)).decode(encode)
return string

@staticmethod
def write(ar: ArchiveBase, val: str):
_, utf16, encode = StringWithLen.get_args(ar)
ar.write(val.encode(encode))


class SerializableBase:
def serialize(self, ar: ArchiveBase): # pragma: no cover
pass
Expand Down
167 changes: 167 additions & 0 deletions src/unreal/city_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Converted UE4's codes (CityHash.cpp, etc.) to python.
# https://github.com/EpicGames/UnrealEngine

# Bit mask to use uint64 and uint32 on python
MASK_64 = 0xFFFFFFFFFFFFFFFF
MASK_32 = 0xFFFFFFFF

# Some primes between 2^63 and 2^64 for various uses.
k0 = 0xc3a5c85c97cb3127
k1 = 0xb492b66fbe98f273
k2 = 0x9ae16a3b2f90404f


def to_uint(binary: bytes) -> int:
return int.from_bytes(binary, "little")


# use char* as uint64 pointer
def fetch64(binary: bytes) -> int:
return to_uint(binary[:8])


# use char* as uint32 pointer
def fetch32(binary: bytes) -> int:
return to_uint(binary[:4])


def bswap_64(i: int) -> int:
i &= MASK_64
b = i.to_bytes(8, byteorder="little")
return int.from_bytes(b, "big")


def rotate(val: int, shift: int) -> int:
val &= MASK_64
return val if shift == 0 else ((val >> shift) | (val << (64 - shift))) & MASK_64


def shift_mix(val: int) -> int:
val &= MASK_64
return (val ^ (val >> 47)) & MASK_64


def hash_len_16(u: int, v: int, mul: int) -> int:
a = ((u ^ v) * mul) & MASK_64
a ^= (a >> 47)
b = ((v ^ a) * mul) & MASK_64
b ^= (b >> 47)
b *= mul
return b & MASK_64


def hash_len_16_2(u: int, v: int) -> int:
kMul = 0x9ddfea08eb382d69
return hash_len_16(u, v, kMul)


def hash_len_0to16(binary: bytes) -> int:
length = len(binary)
if length >= 8:
mul = k2 + length * 2
a = fetch64(binary) + k2
b = fetch64(binary[-8:])
c = rotate(b, 37) * mul + a
d = (rotate(a, 25) + b) * mul
return hash_len_16(c, d, mul)
if length >= 4:
mul = k2 + length * 2
a = fetch32(binary)
return hash_len_16(length + (a << 3), fetch32(binary[-4:]), mul)
if length > 0:
a = binary[0]
b = binary[length >> 1]
c = binary[:-1]
y = (a + (b << 8)) & MASK_32
z = (length + (c << 2)) & MASK_32
return (shift_mix(y * k2 ^ z * k0) * k2) & MASK_64
return k2


def hash_len_17to32(binary: bytes) -> int:
length = len(binary)
mul = k2 + length * 2
a = fetch64(binary) * k1
b = fetch64(binary[8:])
c = fetch64(binary[-8:]) * mul
d = fetch64(binary[-16:]) * k2
return (hash_len_16(
rotate(a + b, 43) + rotate(c, 30) + d,
a + rotate(b + k2, 18) + c,
mul)
) & MASK_64


def hash_len_33to64(binary: bytes) -> int:
length = len(binary)
mul = k2 + length * 2
a = fetch64(binary) * k2
b = fetch64(binary[8:])
c = fetch64(binary[-24:])
d = fetch64(binary[-32:])
e = fetch64(binary[16:]) * k2
f = fetch64(binary[24:]) * 9
g = fetch64(binary[-8:])
h = fetch64(binary[-16:]) * mul
u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9
v = ((a + g) ^ d) + f + 1
w = bswap_64((u + v) * mul) + h
x = rotate(e + f, 42) + c
y = (bswap_64((v + w) * mul) + g) * mul
z = e + f + c
a = (bswap_64((x + z) * mul + y) + b)
b = shift_mix((z + a) * mul + d + h) * mul
return (b + x) & MASK_64


def weak_hash_len32_with_seeds(binary: bytes, a: int, b: int) -> int:
return weak_hash_len32_with_seeds2(
fetch64(binary),
fetch64(binary[8:]),
fetch64(binary[16:]),
fetch64(binary[24:]),
a,
b)


def weak_hash_len32_with_seeds2(w: int, x: int, y: int, z: int, a: int, b: int) -> int:
a += w
b = rotate(b + a + z, 21)
c = a
a += x
a += y
b += rotate(a, 44)
return (a + z) & MASK_64, (b + c) & MASK_64


def city_hash_64(binary: bytes) -> int:
length = len(binary)
if length <= 32:
if length <= 16:
return hash_len_0to16(binary)
else:
return hash_len_17to32(binary)
elif length <= 64:
return hash_len_33to64(binary)

x = fetch64(binary[-40:])
y = fetch64(binary[-16:]) + fetch64(binary[-56:])
z = hash_len_16_2(fetch64(binary[-48:]) + length, fetch64(binary[-24:]))
v_lo, v_hi = weak_hash_len32_with_seeds(binary[-64:], length, z)
w_lo, w_hi = weak_hash_len32_with_seeds(binary[-32:], y + k1, x)
x = x * k1 + fetch64(binary)
length = (length - 1) & (~63)
binary = binary[:length]

while (len(binary) > 0):
x = rotate(x + y + v_lo + fetch64(binary[8:]), 37) * k1
y = rotate(y + v_hi + fetch64(binary[48:]), 42) * k1
x ^= w_hi
y += v_lo + fetch64(binary[40:])
z = rotate(z + w_lo, 33) * k1
v_lo, v_hi = weak_hash_len32_with_seeds(binary, v_hi * k1, x + w_lo)
w_lo, w_hi = weak_hash_len32_with_seeds(binary[32:], z + w_hi, y + fetch64(binary[16:]))
z, x = x, z
binary = binary[64:]
return hash_len_16_2(hash_len_16_2(v_lo, w_lo) + shift_mix(y) * k1 + z,
hash_len_16_2(v_hi, w_hi) + x)
2 changes: 1 addition & 1 deletion src/unreal/crc.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def memcrc(string):
return ~crc & 0xFFFFFFFF


def generate_hash(string):
def strcrc(string):
"""Generate hash from a string.
Args:
Expand Down
43 changes: 43 additions & 0 deletions src/unreal/data_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,46 @@ def print(self, padding=2):
print(pad + f" data size: {self.data_size}")
print(pad + f" outer index: {self.outer_index}")
print(pad + f" legacy bulk data flags: {self.bulk_flags}")


class BulkDataMapEntry(SerializableBase, DataResourceBase):
"""data resource for ucas assets. (FBulkDataMapEntry)
Notes:
UnrealEngine/Engine/Source/Runtime/CoreUObject/Public/Serialization/AsyncLoading2.h
The latest UE version will write the meta data in .uasset.
"""
def __init__(self):
super().__init__()
self.flags = 0
self.duplicated_offset = -1

def serialize(self, ar: ArchiveBase):
if ar.is_writing:
if not ar.valid:
self.update_bulk_flags(ar)

ar << (Int64, self, "offset")
ar << (Int64, self, "duplicated_offset")
ar << (Int64, self, "data_size")
ar << (Uint32, self, "bulk_flags")
ar == (Uint32, 0, "pad")

if ar.is_reading:
self.unpack_bulk_flags(ar)

def update(self, data_size: int, has_uexp_bulk: bool):
super().update(data_size, has_uexp_bulk)
self.has_64bit_size = True

def print(self, padding=2):
pad = " " * padding
print(pad + "DataResource")
print(pad + f" serial offset: {self.offset}")
print(pad + f" duplicated serial offset: {self.duplicated_offset}")
print(pad + f" data size: {self.data_size}")
print(pad + f" flags: {self.bulk_flags}")

@staticmethod
def get_struct_size(ar: ArchiveBase) -> int:
return 32
Loading

0 comments on commit 7b7f563

Please sign in to comment.