Skip to content

Commit

Permalink
[Fixes #12226] Directory assets (#12337)
Browse files Browse the repository at this point in the history
[Fixes #12226] Directory assets
---------
Co-authored-by: etj <[email protected]>
  • Loading branch information
mattiagiupponi authored and giohappy committed Jul 3, 2024
1 parent 216e3dd commit 618a936
Show file tree
Hide file tree
Showing 14 changed files with 286 additions and 75 deletions.
55 changes: 55 additions & 0 deletions geonode/assets/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
import logging
from django.db import models
from django.forms import widgets
from django.contrib import admin

from geonode.assets.local import LocalAssetHandler
from geonode.assets.models import LocalAsset
from geonode.base.models import Link

logger = logging.getLogger(__name__)


class PrettyJSONWidget(widgets.Textarea):

def format_value(self, value):
try:
value = json.dumps(json.loads(value), indent=2, sort_keys=True)
# these lines will try to adjust size of TextArea to fit to content
row_lengths = [len(r) for r in value.split("\n")]
self.attrs["rows"] = min(max(len(row_lengths) + 2, 10), 30)
self.attrs["cols"] = min(max(max(row_lengths) + 2, 40), 120)
return value
except Exception as e:
logger.warning("Error while formatting JSON: {}".format(e))
return super(PrettyJSONWidget, self).format_value(value)


@admin.register(LocalAsset)
class LocalAssetAdmin(admin.ModelAdmin):
model = LocalAsset

list_display = ("id", "title", "type", "owner", "created_formatted", "managed", "links", "link0")
list_display_links = ("id", "title")

formfield_overrides = {models.JSONField: {"widget": PrettyJSONWidget}}

def created_formatted(self, obj):
return obj.created.strftime("%Y-%m-%d %H:%M:%S")

def links(self, obj):
return Link.objects.filter(asset=obj).count()

def link0(self, obj):
link = Link.objects.filter(asset=obj).first()
return f"{link.link_type} {link.extension}: {link.name}" if link else None

def managed(self, obj) -> bool:
try:
return LocalAssetHandler._is_file_managed(obj.location[0])
except Exception as e:
logger.error(f"Bad location for asset obj: {e}")
return None

managed.boolean = True
14 changes: 7 additions & 7 deletions geonode/assets/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
logger = logging.getLogger(__name__)


class AssetDownloadHandlerInterface:

def create_response(self, asset: Asset, attachment: bool = False, basename=None, path=None) -> HttpResponse:
raise NotImplementedError()


class AssetHandlerInterface:

def handled_asset_class(self):
Expand All @@ -32,19 +38,13 @@ def clone(self, asset: Asset) -> Asset:
def create_link_url(self, asset: Asset) -> str:
raise NotImplementedError()

def get_download_handler(self, asset: Asset):
def get_download_handler(self, asset: Asset) -> AssetDownloadHandlerInterface:
raise NotImplementedError()

def get_storage_manager(self, asset):
raise NotImplementedError()


class AssetDownloadHandlerInterface:

def create_response(self, asset: Asset, attachment: bool = False, basename=None) -> HttpResponse:
raise NotImplementedError()


class AssetHandlerRegistry:
_registry = {}
_default_handler = None
Expand Down
158 changes: 118 additions & 40 deletions geonode/assets/local.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import logging
import os
import shutil

from django.conf import settings
from django.http import HttpResponse
Expand Down Expand Up @@ -40,9 +41,7 @@ def create(self, title, description, type, owner, files=None, clone_files=False,
raise ValueError("File(s) expected")

if clone_files:
prefix = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
files = _asset_storage_manager.copy_files_list(files, dir=settings.ASSETS_ROOT, dir_prefix=prefix)
# TODO: please note the copy_files_list will make flat any directory structure
files = self._copy_data(files)

asset = LocalAsset(
title=title,
Expand All @@ -60,38 +59,61 @@ def remove_data(self, asset: LocalAsset):
Removes the files related to an Asset.
Only files within the Assets directory are removed
"""
removed_dir = set()
for file in asset.location:
is_managed = self._is_file_managed(file)
if is_managed:
logger.info(f"Removing asset file {file}")
_asset_storage_manager.delete(file)
removed_dir.add(os.path.dirname(file))
else:
logger.info(f"Not removing asset file outside asset directory {file}")

# TODO: in case of subdirs, make sure that all the tree is removed in the proper order
for dir in removed_dir:
if not os.path.exists(dir):
logger.warning(f"Trying to remove not existing asset directory {dir}")
continue
if not os.listdir(dir):
logger.info(f"Removing empty asset directory {dir}")
os.rmdir(dir)
if self._are_files_managed(asset):
logger.info(f"Removing files for asset {asset.pk}")
base = self._get_managed_dir(asset)
logger.info(f"Removing asset path {base} for asset {asset.pk}")
shutil.rmtree(base)
else:
logger.info(f"Not removing unmanaged files for asset {asset.pk}")

def replace_data(self, asset: LocalAsset, files: list):
self.remove_data(asset)
asset.location = files
asset.save()

def _copy_data(self, files):
new_path = self._create_asset_dir()
logger.info(f"Copying asset data from {files} into {new_path}")
new_files = []
for file in files:
if os.path.isdir(file):
dst = os.path.join(new_path, os.path.basename(file))
logging.info(f"Copying into {dst} directory {file}")
new_dir = shutil.copytree(file, dst)
new_files.append(new_dir)
elif os.path.isfile(file):
logging.info(f"Copying into {new_path} file {os.path.basename(file)}")
new_file = shutil.copy2(file, new_path)
new_files.append(new_file)
else:
logger.warning(f"Not copying path {file}")

return new_files

def _clone_data(self, source_dir):
new_path = self._create_asset_dir()
logger.info(f"Cloning asset data from {source_dir} into {new_path}")

if settings.FILE_UPLOAD_DIRECTORY_PERMISSIONS is not None:
# value is always set by default as None
# https://docs.djangoproject.com/en/3.2/ref/settings/#file-upload-directory-permissions
os.chmod(new_path, settings.FILE_UPLOAD_DIRECTORY_PERMISSIONS)

shutil.copytree(source_dir, new_path, dirs_exist_ok=True)

return new_path

def clone(self, source: LocalAsset) -> LocalAsset:
# get a new asset instance to be edited and stored back
asset = LocalAsset.objects.get(pk=source.pk)

# only copy files if they are managed
if self._are_files_managed(asset.location):
asset.location = _asset_storage_manager.copy_files_list(
asset.location, dir=settings.ASSETS_ROOT, dir_prefix=datetime.datetime.now().strftime("%Y%m%d%H%M%S")
)
if self._are_files_managed(asset):
base = self._get_managed_dir(asset)
cloned = self._clone_data(base)
asset.location = [os.path.normpath(file).replace(base, cloned) for file in asset.location]

# it's a polymorphic object, we need to null both IDs
# https://django-polymorphic.readthedocs.io/en/stable/advanced.html#copying-polymorphic-objects
asset.pk = None
Expand All @@ -104,56 +126,112 @@ def create_download_url(self, asset) -> str:
return build_absolute_uri(reverse("assets-download", args=(asset.pk,)))

def create_link_url(self, asset) -> str:
return build_absolute_uri(reverse("assets-link", args=(asset.pk,)))
return build_absolute_uri(reverse("assets-link", args=(asset.pk,))) + f"/{os.path.basename(asset.location[0])}"

def _is_file_managed(self, file) -> bool:
@classmethod
def _is_file_managed(cls, file) -> bool:
assets_root = os.path.normpath(settings.ASSETS_ROOT)
return file.startswith(assets_root)

def _are_files_managed(self, files: list) -> bool:
@classmethod
def _are_files_managed(cls, asset: LocalAsset) -> bool:
"""
:param files: files to be checked
:return: True if all files are managed, False is no file is managed
:raise: ValueError if both managed and unmanaged files are in the list
"""
managed = unmanaged = None
for file in files:
if self._is_file_managed(file):
for file in asset.location:
if cls._is_file_managed(file):
managed = True
else:
unmanaged = True
if managed and unmanaged:
logger.error(f"Both managed and unmanaged files are present: {files}")
logger.error(f"Both managed and unmanaged files are present on Asset {asset.pk}: {asset.location}")
raise ValueError("Both managed and unmanaged files are present")

return bool(managed)

@classmethod
def _get_managed_dir(cls, asset):
if not asset.location:
raise ValueError("Asset does not have any associated file")

assets_root = os.path.normpath(settings.ASSETS_ROOT)
base_common = None

for file in asset.location:
if not cls._is_file_managed(file):
raise ValueError("Asset is unmanaged")

norm_file = os.path.normpath(file)
relative = norm_file.removeprefix(assets_root)
base = os.path.split(relative)[0].lstrip("/")

if base_common:
if base_common != base:
raise ValueError(f"Mismatching base dir in asset files - Asset {asset.pk}")
else:
base_common = base

managed_dir = os.path.join(assets_root, base_common)
if not os.path.isdir(managed_dir):
raise ValueError(f"Common dir '{managed_dir}' does not seem to be a directory - Asset {asset.pk}")

if assets_root == managed_dir: # dunno if this can ever happen, but better safe than sorry
raise ValueError(f"Common dir '{managed_dir}' matches the whole Assets dir - Asset {asset.pk}")

return managed_dir


class LocalAssetDownloadHandler(AssetDownloadHandlerInterface):

def create_response(self, asset: LocalAsset, attachment: bool = False, basename=None) -> HttpResponse:
def create_response(
self, asset: LocalAsset, attachment: bool = False, basename: str = None, path: str = None
) -> HttpResponse:
if not asset.location:
return HttpResponse("Asset does not contain any data", status=500)

if len(asset.location) > 1:
logger.warning("TODO: Asset contains more than one file. Download needs to be implemented")

file0 = asset.location[0]
filename = os.path.basename(file0)
orig_base, ext = os.path.splitext(filename)
outname = f"{basename or orig_base}{ext}"
if not path: # use the file definition
if not os.path.isfile(file0):
logger.warning(f"Default file {file0} not found for asset {asset.id}")
return HttpResponse(f"Default file not found for asset {asset.id}", status=400)
localfile = file0

else: # a specific file is requested
if "/../" in path: # we may want to improve fraudolent request detection
logger.warning(f"Tentative path traversal for asset {asset.id}")
return HttpResponse(f"File not found for asset {asset.id}", status=400)

if os.path.isfile(file0):
dir0 = os.path.dirname(file0)
elif os.path.isdir(file0):
dir0 = file0
else:
return HttpResponse(f"Unexpected internal location '{file0}' for asset {asset.id}", status=500)

localfile = os.path.join(dir0, path)
logger.debug(f"Requested path {dir0} + {path}")

if os.path.isfile(localfile):
filename = os.path.basename(localfile)
orig_base, ext = os.path.splitext(filename)
outname = f"{basename or orig_base or 'file'}{ext}"

if _asset_storage_manager.exists(file0):
logger.info(f"Returning file {file0} with name {outname}")
logger.info(f"Returning file '{localfile}' with name '{outname}'")

return DownloadResponse(
_asset_storage_manager.open(file0).file,
_asset_storage_manager.open(localfile).file,
basename=f"{outname}",
attachment=attachment,
)
else:
logger.warning(f"Internal file {file0} not found for asset {asset.id}")
return HttpResponse(f"Internal file not found for asset {asset.id}", status=500)
logger.warning(f"Internal file {localfile} not found for asset {asset.id}")
return HttpResponse(f"Internal file not found for asset {asset.id}", status=404 if path else 500)


asset_handler_registry.register(LocalAssetHandler)
2 changes: 1 addition & 1 deletion geonode/assets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Meta:
verbose_name_plural = "Local assets"

def __str__(self) -> str:
return super().__str__()
return f"{self.__class__.__name__}: {self.type}|{self.title}"


def cleanup_asset_data(instance, *args, **kwargs):
Expand Down
Loading

0 comments on commit 618a936

Please sign in to comment.