Skip to content

Commit

Permalink
Overwrite check_details in Arweave class.
Browse files Browse the repository at this point in the history
  • Loading branch information
mariacarmina committed Nov 16, 2023
1 parent eff94e1 commit dc65553
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 6 deletions.
6 changes: 1 addition & 5 deletions ocean_provider/file_types/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import mimetypes
import os
import re
from abc import abstractmethod
from cgi import parse_header
from typing import Protocol, Tuple
Expand Down Expand Up @@ -72,10 +71,7 @@ def check_details(self, with_checksum=False):
if result:
status_code = result.status_code
headers = copy.deepcopy(result.headers)
if "arweave" in result.url:
files_url = ""
else:
files_url = copy.deepcopy(result.url)
files_url = copy.deepcopy(result.url)
# always close requests session, see https://requests.readthedocs.io/en/latest/user/advanced/#body-content-workflow
result.close()
if status_code == 200:
Expand Down
72 changes: 71 additions & 1 deletion ocean_provider/file_types/file_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import logging
import os
import re
import copy
from typing import Any, Optional, Tuple
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
from uuid import uuid4

from enforce_typing import enforce_types
from ocean_provider.file_types.definitions import EndUrlType, FilesType
import requests
from ocean_provider.utils.url import is_safe_url

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -118,6 +121,73 @@ def get_download_url(self):
def get_filename(self):
return uuid4().hex

def check_details(self, with_checksum=False):
"""
If the url argument is invalid, returns False and empty dictionary.
Otherwise it returns True and a dictionary containing contentType and
contentLength. File name remains empty.
"""

url = self.get_download_url()

try:
if not is_safe_url(url):
return False, {}
status_code = None
headers = None
files_url = None
for _ in range(int(os.getenv("REQUEST_RETRIES", 1))):
result, extra_data = self._get_result_from_url(
with_checksum=with_checksum,
)
if result:
status_code = result.status_code
headers = copy.deepcopy(result.headers)
files_url = ""
# always close requests session, see https://requests.readthedocs.io/en/latest/user/advanced/#body-content-workflow
result.close()
if status_code == 200:
break

if status_code == 200:
content_type = headers.get("Content-Type")
content_length = headers.get("Content-Length")
content_range = headers.get("Content-Range")
file_name = None

if files_url:
file_name = urlparse(files_url).path.split("/")[-1]

if not content_length and content_range:
# sometimes servers send content-range instead
try:
content_length = content_range.split("-")[1]
except IndexError:
pass

if content_type:
try:
content_type = content_type.split(";")[0]
except IndexError:
pass

if content_type or content_length or file_name:
details = {
"contentLength": content_length or "",
"contentType": content_type or "",
"filename": file_name or "",
}

if extra_data:
details.update(extra_data)

self.checked_details = details
return True, details
except requests.exceptions.RequestException:
pass

return False, {}


class GraphqlQuery(EndUrlType, FilesType):
@enforce_types
Expand Down

0 comments on commit dc65553

Please sign in to comment.