Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fixes #12226] Directory assets #12337

Merged
merged 13 commits into from
Jun 18, 2024
55 changes: 55 additions & 0 deletions geonode/assets/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
import logging
from django.db import models
from django.forms import widgets
from django.contrib import admin

from geonode.assets.local import LocalAssetHandler
from geonode.assets.models import LocalAsset
from geonode.base.models import Link

logger = logging.getLogger(__name__)


class PrettyJSONWidget(widgets.Textarea):

def format_value(self, value):
try:
value = json.dumps(json.loads(value), indent=2, sort_keys=True)
# these lines will try to adjust size of TextArea to fit to content
row_lengths = [len(r) for r in value.split("\n")]
self.attrs["rows"] = min(max(len(row_lengths) + 2, 10), 30)
self.attrs["cols"] = min(max(max(row_lengths) + 2, 40), 120)
return value
except Exception as e:
logger.warning("Error while formatting JSON: {}".format(e))
return super(PrettyJSONWidget, self).format_value(value)


@admin.register(LocalAsset)
class LocalAssetAdmin(admin.ModelAdmin):
model = LocalAsset

list_display = ("id", "title", "type", "owner", "created_formatted", "managed", "links", "link0")
list_display_links = ("id", "title")

formfield_overrides = {models.JSONField: {"widget": PrettyJSONWidget}}

def created_formatted(self, obj):
return obj.created.strftime("%Y-%m-%d %H:%M:%S")

def links(self, obj):
return Link.objects.filter(asset=obj).count()

def link0(self, obj):
link = Link.objects.filter(asset=obj).first()
return f"{link.link_type} {link.extension}: {link.name}" if link else None

def managed(self, obj) -> bool:
try:
return LocalAssetHandler._is_file_managed(obj.location[0])
except Exception as e:
logger.error(f"Bad location for asset obj: {e}")
return None

managed.boolean = True
14 changes: 7 additions & 7 deletions geonode/assets/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
logger = logging.getLogger(__name__)


class AssetDownloadHandlerInterface:

def create_response(self, asset: Asset, attachment: bool = False, basename=None, path=None) -> HttpResponse:
raise NotImplementedError()


class AssetHandlerInterface:

def handled_asset_class(self):
Expand All @@ -32,19 +38,13 @@ def clone(self, asset: Asset) -> Asset:
def create_link_url(self, asset: Asset) -> str:
raise NotImplementedError()

def get_download_handler(self, asset: Asset):
def get_download_handler(self, asset: Asset) -> AssetDownloadHandlerInterface:
raise NotImplementedError()

def get_storage_manager(self, asset):
raise NotImplementedError()


class AssetDownloadHandlerInterface:

def create_response(self, asset: Asset, attachment: bool = False, basename=None) -> HttpResponse:
raise NotImplementedError()


class AssetHandlerRegistry:
_registry = {}
_default_handler = None
Expand Down
158 changes: 118 additions & 40 deletions geonode/assets/local.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import logging
import os
import shutil

from django.conf import settings
from django.http import HttpResponse
Expand Down Expand Up @@ -40,9 +41,7 @@ def create(self, title, description, type, owner, files=None, clone_files=False,
raise ValueError("File(s) expected")

if clone_files:
prefix = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
files = _asset_storage_manager.copy_files_list(files, dir=settings.ASSETS_ROOT, dir_prefix=prefix)
# TODO: please note the copy_files_list will make flat any directory structure
files = self._copy_data(files)

asset = LocalAsset(
title=title,
Expand All @@ -60,38 +59,61 @@ def remove_data(self, asset: LocalAsset):
Removes the files related to an Asset.
Only files within the Assets directory are removed
"""
removed_dir = set()
for file in asset.location:
is_managed = self._is_file_managed(file)
if is_managed:
logger.info(f"Removing asset file {file}")
_asset_storage_manager.delete(file)
removed_dir.add(os.path.dirname(file))
else:
logger.info(f"Not removing asset file outside asset directory {file}")

# TODO: in case of subdirs, make sure that all the tree is removed in the proper order
for dir in removed_dir:
if not os.path.exists(dir):
logger.warning(f"Trying to remove not existing asset directory {dir}")
continue
if not os.listdir(dir):
logger.info(f"Removing empty asset directory {dir}")
os.rmdir(dir)
if self._are_files_managed(asset):
logger.info(f"Removing files for asset {asset.pk}")
base = self._get_managed_dir(asset)
logger.info(f"Removing asset path {base} for asset {asset.pk}")
shutil.rmtree(base)
else:
logger.info(f"Not removing unmanaged files for asset {asset.pk}")

def replace_data(self, asset: LocalAsset, files: list):
self.remove_data(asset)
asset.location = files
asset.save()

def _copy_data(self, files):
new_path = self._create_asset_dir()
logger.info(f"Copying asset data from {files} into {new_path}")
new_files = []
for file in files:
if os.path.isdir(file):
dst = os.path.join(new_path, os.path.basename(file))
logging.info(f"Copying into {dst} directory {file}")
new_dir = shutil.copytree(file, dst)
new_files.append(new_dir)
elif os.path.isfile(file):
logging.info(f"Copying into {new_path} file {os.path.basename(file)}")
new_file = shutil.copy2(file, new_path)
new_files.append(new_file)
else:
logger.warning(f"Not copying path {file}")

return new_files

def _clone_data(self, source_dir):
new_path = self._create_asset_dir()
logger.info(f"Cloning asset data from {source_dir} into {new_path}")

if settings.FILE_UPLOAD_DIRECTORY_PERMISSIONS is not None:
# value is always set by default as None
# https://docs.djangoproject.com/en/3.2/ref/settings/#file-upload-directory-permissions
os.chmod(new_path, settings.FILE_UPLOAD_DIRECTORY_PERMISSIONS)

shutil.copytree(source_dir, new_path, dirs_exist_ok=True)

return new_path

def clone(self, source: LocalAsset) -> LocalAsset:
# get a new asset instance to be edited and stored back
asset = LocalAsset.objects.get(pk=source.pk)

# only copy files if they are managed
if self._are_files_managed(asset.location):
asset.location = _asset_storage_manager.copy_files_list(
asset.location, dir=settings.ASSETS_ROOT, dir_prefix=datetime.datetime.now().strftime("%Y%m%d%H%M%S")
)
if self._are_files_managed(asset):
base = self._get_managed_dir(asset)
cloned = self._clone_data(base)
asset.location = [os.path.normpath(file).replace(base, cloned) for file in asset.location]

# it's a polymorphic object, we need to null both IDs
# https://django-polymorphic.readthedocs.io/en/stable/advanced.html#copying-polymorphic-objects
asset.pk = None
Expand All @@ -104,56 +126,112 @@ def create_download_url(self, asset) -> str:
return build_absolute_uri(reverse("assets-download", args=(asset.pk,)))

def create_link_url(self, asset) -> str:
return build_absolute_uri(reverse("assets-link", args=(asset.pk,)))
return build_absolute_uri(reverse("assets-link", args=(asset.pk,))) + f"/{os.path.basename(asset.location[0])}"

def _is_file_managed(self, file) -> bool:
@classmethod
def _is_file_managed(cls, file) -> bool:
assets_root = os.path.normpath(settings.ASSETS_ROOT)
return file.startswith(assets_root)

def _are_files_managed(self, files: list) -> bool:
@classmethod
def _are_files_managed(cls, asset: LocalAsset) -> bool:
"""
:param files: files to be checked
:return: True if all files are managed, False is no file is managed
:raise: ValueError if both managed and unmanaged files are in the list
"""
managed = unmanaged = None
for file in files:
if self._is_file_managed(file):
for file in asset.location:
if cls._is_file_managed(file):
managed = True
else:
unmanaged = True
if managed and unmanaged:
logger.error(f"Both managed and unmanaged files are present: {files}")
logger.error(f"Both managed and unmanaged files are present on Asset {asset.pk}: {asset.location}")
raise ValueError("Both managed and unmanaged files are present")

return bool(managed)

@classmethod
def _get_managed_dir(cls, asset):
if not asset.location:
raise ValueError("Asset does not have any associated file")

assets_root = os.path.normpath(settings.ASSETS_ROOT)
base_common = None

for file in asset.location:
if not cls._is_file_managed(file):
raise ValueError("Asset is unmanaged")

norm_file = os.path.normpath(file)
relative = norm_file.removeprefix(assets_root)
base = os.path.split(relative)[0].lstrip("/")

if base_common:
if base_common != base:
raise ValueError(f"Mismatching base dir in asset files - Asset {asset.pk}")
else:
base_common = base

managed_dir = os.path.join(assets_root, base_common)
if not os.path.isdir(managed_dir):
raise ValueError(f"Common dir '{managed_dir}' does not seem to be a directory - Asset {asset.pk}")

if assets_root == managed_dir: # dunno if this can ever happen, but better safe than sorry
raise ValueError(f"Common dir '{managed_dir}' matches the whole Assets dir - Asset {asset.pk}")

return managed_dir


class LocalAssetDownloadHandler(AssetDownloadHandlerInterface):

def create_response(self, asset: LocalAsset, attachment: bool = False, basename=None) -> HttpResponse:
def create_response(
self, asset: LocalAsset, attachment: bool = False, basename: str = None, path: str = None
) -> HttpResponse:
if not asset.location:
return HttpResponse("Asset does not contain any data", status=500)

if len(asset.location) > 1:
logger.warning("TODO: Asset contains more than one file. Download needs to be implemented")

file0 = asset.location[0]
filename = os.path.basename(file0)
orig_base, ext = os.path.splitext(filename)
outname = f"{basename or orig_base}{ext}"
if not path: # use the file definition
if not os.path.isfile(file0):
logger.warning(f"Default file {file0} not found for asset {asset.id}")
return HttpResponse(f"Default file not found for asset {asset.id}", status=400)
localfile = file0

else: # a specific file is requested
if "/../" in path: # we may want to improve fraudolent request detection
logger.warning(f"Tentative path traversal for asset {asset.id}")
return HttpResponse(f"File not found for asset {asset.id}", status=400)

if os.path.isfile(file0):
dir0 = os.path.dirname(file0)
elif os.path.isdir(file0):
dir0 = file0
else:
return HttpResponse(f"Unexpected internal location '{file0}' for asset {asset.id}", status=500)

localfile = os.path.join(dir0, path)
logger.debug(f"Requested path {dir0} + {path}")

if os.path.isfile(localfile):
filename = os.path.basename(localfile)
orig_base, ext = os.path.splitext(filename)
outname = f"{basename or orig_base or 'file'}{ext}"

if _asset_storage_manager.exists(file0):
logger.info(f"Returning file {file0} with name {outname}")
logger.info(f"Returning file '{localfile}' with name '{outname}'")

return DownloadResponse(
_asset_storage_manager.open(file0).file,
_asset_storage_manager.open(localfile).file,
basename=f"{outname}",
attachment=attachment,
)
else:
logger.warning(f"Internal file {file0} not found for asset {asset.id}")
return HttpResponse(f"Internal file not found for asset {asset.id}", status=500)
logger.warning(f"Internal file {localfile} not found for asset {asset.id}")
return HttpResponse(f"Internal file not found for asset {asset.id}", status=404 if path else 500)


asset_handler_registry.register(LocalAssetHandler)
2 changes: 1 addition & 1 deletion geonode/assets/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Meta:
verbose_name_plural = "Local assets"

def __str__(self) -> str:
return super().__str__()
return f"{self.__class__.__name__}: {self.type}|{self.title}"


def cleanup_asset_data(instance, *args, **kwargs):
Expand Down
Loading
Loading