Skip to content

Commit

Permalink
Merge pull request #616 from SMI/check-package-clobber
Browse files Browse the repository at this point in the history
Check for file clobbering during package script
  • Loading branch information
rkm authored Feb 22, 2021
2 parents cc7da4d + e027cf1 commit 5eb2a61
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 41 deletions.
6 changes: 6 additions & 0 deletions .azure-pipelines/jobs/package.tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ jobs:
- template: ../steps/set-git-tag-variable.tmpl.yml
- ${{ if eq( parameters.os, 'win' ) }}:
- template: ../steps/win-nuget-fixup.tmpl.yml
- ${{ if eq( parameters.os, 'linux' ) }}:
- task: Bash@3
displayName: Install exiftool
inputs:
targetType: inline
script: sudo apt-get install -y exiftool
- task: Bash@3
displayName: Build artefacts
inputs:
Expand Down
276 changes: 236 additions & 40 deletions .azure-pipelines/scripts/buildArtefacts.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,41 @@
#!/usr/bin/env python3
"""
Builds compiled packages for the C# and Java services in this repo.
The build output for the C# services looks like:
dist/
v1.2.3/
# Temporary during build. Contains all C# services and DLLs separated by csproj
smi-services-build-tmp/
DicomTagReader/
...
# Temporary during build. Contains all C# services and DLLs merged into one dir
smi-services-{tag}-{platform}-x64/
...
# The final output archive
smi-services-{tag}-{platform}-x64.tgz
The files in the merged dir are checked for accidental overwriting, which can occur when
publishing a solution of multiple projects into a single directory. See
https://github.com/dotnet/sdk/issues/9984.
NOTE: Requires exiftool when running on Linux
"""
import argparse
import concurrent
import filecmp
import functools
import glob
import hashlib
import re
import shutil
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from subprocess import CalledProcessError
from typing import Dict
from typing import Optional
from typing import Sequence
Expand All @@ -16,6 +46,8 @@
_WINDOWS = "win"
_PLATFORMS = (_LINUX, _WINDOWS)
_STR_LIKE = Union[str, Path]
_ASSEMBLY_NAME_RE = re.compile(".*AssemblyName>(.*)<", re.IGNORECASE)
_IS_PUBLISHABLE_RE = re.compile(".*IsPublishable>false<", re.IGNORECASE)


def _run(cmd: Sequence[_STR_LIKE]) -> None:
Expand All @@ -27,7 +59,7 @@ def _windows_bash_fixup(platform: str, cmd: Sequence[_STR_LIKE]) -> Sequence[_ST
return cmd if platform != _WINDOWS else ("powershell", "bash", *cmd)


def _build_java_packages(dist_dir: Path, tag: str) -> None:
def _build_java_packages(dist_tag_dir: Path, tag: str) -> None:

# Build Java microserves

Expand All @@ -51,20 +83,21 @@ def _build_java_packages(dist_dir: Path, tag: str) -> None:
for zip_path in zips:
shutil.copyfile(
zip_path,
dist_dir / f"{zip_path.name.split('-')[0]}-{tag}.zip",
dist_tag_dir / f"{zip_path.name.split('-')[0]}-{tag}.zip",
)

# Build nerd

cmd = (
"mvn", "-ntp",
"mvn",
"-ntp",
"-f", "./src/microservices/uk.ac.dundee.hic.nerd",
"-DskipTests",
"package",
)
_run(cmd)

nerd_jar, = {
(nerd_jar,) = {
Path(x)
for x in glob.glob(
"./src/microservices/uk.ac.dundee.hic.nerd/target/nerd-*.jar",
Expand All @@ -73,10 +106,154 @@ def _build_java_packages(dist_dir: Path, tag: str) -> None:
}
shutil.copyfile(
nerd_jar,
dist_dir / f"smi-nerd-{tag}.jar",
dist_tag_dir / f"smi-nerd-{tag}.jar",
)


def _publish_csproj(build_dir: Path, platform: str, csproj_path: Path) -> None:

assembly_name: Optional[str] = None
with open(csproj_path) as f:
for line in f:

pub_match = _IS_PUBLISHABLE_RE.match(line)
if pub_match:
print(f"{cproj_path} not publishable")
return None

aname_match = _ASSEMBLY_NAME_RE.match(line)
if aname_match:
assembly_name = aname_match.group(1)
break

if not assembly_name:
raise AssertionError(f"Couldn't find AssemblyName in {csproj_path}")

publish_dir = build_dir / assembly_name
publish_dir.mkdir()
cmd = (
"dotnet", "publish",
"-p:Platform=x64",
"--configuration", "Release",
"-p:PublishTrimmed=false",
"--runtime", f"{platform}-x64",
"--output", publish_dir,
"--nologo",
csproj_path,
)

proc = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# TODO(rkm 2021-02-20) Runs in parallel so need to capture output and log properly
stdout = proc.stdout.decode().strip()
stderr = proc.stderr.decode().strip()
if stdout or stderr:
print(f"=== {csproj_path} ===")
print(f"STDOUT\n{stdout}\n")
print(f"STDERR\n{stderr}\n")

if proc.returncode:
raise CalledProcessError(f"Build failed for {csproj_path}")

return None


def _md5sum(file_path: Path) -> str:
with open(file_path, mode="rb") as f:
d = hashlib.md5()
for buf in iter(functools.partial(f.read, 128), b""):
d.update(buf)
return d.hexdigest()


def _get_assembly_version(file_path: Path) -> str:
try:
proc = subprocess.run(
(
"/usr/bin/exiftool",
"-S",
"-AssemblyVersion",
file_path,
),
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
version = proc.stdout.decode().strip().split()[1]
return version
except CalledProcessError as exc:
if "File format error" in exc.stderr.decode():
return "<Unknown>"
raise


def _merge_files(build_dir: Path, base_output_dir: Path, platform: str) -> bool:

base_output_dir.mkdir()
files = {}
clobbered = set()

def _check_clobber_and_copy(src: Path, output_dir=None) -> None:

output_dir = output_dir or base_output_dir

if src.is_dir():
sub_dir = output_dir / src.name
sub_dir.mkdir(exist_ok=True)
for f in src.iterdir():
_check_clobber_and_copy(f, sub_dir)
return None

nonlocal files, clobbered
existing = output_dir / src.name
if existing.is_file() and not filecmp.cmp(
existing,
src,
# NOTE(rkm 2021-02-20) Don't just compare on os.stat
shallow=False,
):
clobbered.add(existing)

if not existing in files:
files[existing] = []
files[existing].append(src)

shutil.copy2(src, output_dir)
return None

for csproj_dir in [d for d in build_dir.iterdir() if d.is_dir()]:
for file_or_dir_path in csproj_dir.iterdir():
_check_clobber_and_copy(file_or_dir_path)

for file_path in sorted(clobbered):
print(f"=== Clobbered {file_path.name} ===")
uniq = {}
for f in files[file_path]:
md5 = _md5sum(f)
print(f"{md5}\t{f}")
if f.suffix == ".dll":
uniq[md5] = f

# TODO(rkm 2021-02-22) Find some way to get AssemblyVersions on Windows
if platform == _WINDOWS:
continue

if uniq:
print("Versions:")
for md5, f in uniq.items():
dll_version = _get_assembly_version(f)
print(f" {dll_version} ({md5})")
print()
else:
# Did not clobber anything
return False

return True


def main(argv: Optional[Sequence[str]] = None) -> int:

parser = argparse.ArgumentParser()
Expand All @@ -91,13 +268,12 @@ def main(argv: Optional[Sequence[str]] = None) -> int:

tag = args.tag
platform = args.platform
dist_dir = Path("dist", tag)
smi_services_output_dir = f"smi-services-{tag}-{platform}-x64"
dist_tag_dir = Path("dist", tag)

if dist_dir.is_dir():
print(f"Error: {dist_dir} already exists", file=sys.stderr)
if dist_tag_dir.is_dir():
print(f"Error: {dist_tag_dir} already exists", file=sys.stderr)
return 1
dist_dir.mkdir(parents=True)
dist_tag_dir.mkdir(parents=True)

cmd: Sequence[_STR_LIKE]

Expand All @@ -109,54 +285,74 @@ def main(argv: Optional[Sequence[str]] = None) -> int:

# Publish dotnet packages

cmd = (
"dotnet", "publish",
"-p:Platform=x64",
"--configuration", "Release",
"-p:PublishTrimmed=false",
"--runtime", f"{platform}-x64",
"--output", dist_dir / smi_services_output_dir,
"-v", "quiet", "--nologo",
tmp_build_dir = dist_tag_dir / "smi-services-build-tmp"
tmp_build_dir.mkdir()
publish_csproj = functools.partial(_publish_csproj, tmp_build_dir, platform)
csproj_paths = {Path(x) for x in glob.glob("src/**/*.csproj", recursive=True)}
failed_builds = []

# NOTE(rkm 2021-02-20) Might get a bit of a benefit here - runs on Standard_DS2_v2 (2 vCPU)
with ThreadPoolExecutor() as executor:
build_results = {executor.submit(publish_csproj, p): p for p in csproj_paths}
for future in concurrent.futures.as_completed(build_results):
csproj_path = build_results[future]
try:
future.result()
except Exception as exc:
print(f"{csproj_path} generated an exception: {exc}")
failed_builds.append(csproj_path)

if failed_builds:
failed = "\n".join([x.name for x in failed_builds])
print(f"At least one build failed:\n{failed}")
return 1

smi_services_output_dir = f"smi-services-{tag}-{platform}-x64"
did_clobber = _merge_files(
tmp_build_dir,
dist_tag_dir / smi_services_output_dir,
platform,
)
_run(cmd)
if did_clobber:
return 1

shutil.rmtree(tmp_build_dir)

if platform == _LINUX:
cmd = (
"tar", "-c",
# TODO(rkm 2020-12-23) pigz
"-z", # "-I", shlex.quote("pigz -9"),
"-f", dist_dir / f"{smi_services_output_dir}.tgz",
dist_dir / smi_services_output_dir,
"tar",
"-C", dist_tag_dir,
"-czf",
dist_tag_dir / f"{smi_services_output_dir}.tgz",
smi_services_output_dir,
)
elif platform == _WINDOWS:
# NOTE(rkm 2020-12-23) If building Windows _from_ Linux, this needs to be 7za
cmd = (
"7z", "a",
"7z",
"a",
f"-w{dist_tag_dir}",
"-tzip",
"-mx9",
dist_dir / f"{smi_services_output_dir}.zip",
"-r", dist_dir / smi_services_output_dir,
"-r",
dist_tag_dir / f"{smi_services_output_dir}.zip",
smi_services_output_dir,
)
else:
print(f"Error: No case for platform {platform}", file=sys.stderr)
return 1

_run(cmd)
shutil.rmtree(dist_dir / smi_services_output_dir)
shutil.rmtree(dist_tag_dir / smi_services_output_dir)

if platform == _LINUX:
_build_java_packages(dist_dir, tag)

# Create checksum file
# NOTE(rkm 2020-12-23) No easy cross-platform md5sum tool, so have to use hashlib

hashes: Dict[str, str] = {}
for artefact_path in dist_dir.iterdir():
with open(artefact_path, "rb") as artefact_file:
hashes[artefact_path.name] = hashlib.md5(artefact_file.read()).hexdigest()
with open(dist_dir / f"MD5SUMS-{platform}.txt", "w") as md5_file:
for filename in sorted(hashes):
md5_file.write(f"{hashes[filename]} {filename}\n")
_build_java_packages(dist_tag_dir, tag)

# Create checksum files
file_checksums = {x.name: _md5sum(x) for x in dist_tag_dir.iterdir()}
with open(dist_tag_dir / f"MD5SUMS-{platform}.txt", "w") as md5_file:
for file_name, md5sum in file_checksums.items():
md5_file.write(f"{md5sum} {file_name}\n")

return 0

Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Each entry in the changelog should use the following template:

## [Unreleased]

-
- [#616](https://github.com/SMI/SmiServices/pull/616) by `rkm`. Check for clobbered files during package build

## [1.15.1] 2021-02-17

Expand Down

0 comments on commit 5eb2a61

Please sign in to comment.