Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add content retrieval logic #24

Merged
merged 19 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions jupyter_drives/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ def initialize(self, logger: logging.Logger, manager: JupyterDrivesManager):
return super().initialize(logger, manager)

@tornado.web.authenticated
async def get(self, path: str = "", drive: str = ""):
async def get(self, drive: str = "", path: str = ""):
result = await self._manager.get_contents(drive, path)
self.finish(result)

@tornado.web.authenticated
async def post(self, path: str = "", drive: str = ""):
async def post(self, drive: str = "", path: str = ""):
result = await self._manager.new_file(drive, path)
self.finish(result)

@tornado.web.authenticated
async def patch(self, path: str = "", drive: str = ""):
async def patch(self, drive: str = "", path: str = ""):
body = self.get_json_body()
result = await self._manager.rename_file(drive, path, **body)
self.finish(result)
Expand Down
64 changes: 60 additions & 4 deletions jupyter_drives/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
import logging
from typing import Dict, List, Optional, Tuple, Union, Any

import os
import tornado
import httpx
import traitlets
import base64
from jupyter_server.utils import url_path_join

import obstore as obs
from libcloud.storage.types import Provider
from libcloud.storage.providers import get_driver
import pyarrow

from .log import get_logger
from .base import DrivesConfig
Expand Down Expand Up @@ -86,7 +89,7 @@ async def list_drives(self):
"name": result.name,
"region": self._config.region_name if self._config.region_name is not None else "eu-north-1",
"creation_date": result.extra["creation_date"],
"mounted": "true" if result.name not in self._content_managers else "false",
"mounted": False if result.name not in self._content_managers else True,
"provider": self._config.provider
}
)
Expand Down Expand Up @@ -153,14 +156,67 @@ async def unmount_drive(self, drive_name: str):

return

async def get_contents(self, drive_name, path, **kwargs):
async def get_contents(self, drive_name, path):
"""Get contents of a file or directory.

Args:
drive_name: name of drive to get the contents of
path: path to file or directory
path: path to file or directory (empty string for root listing)
"""
print('Get contents function called.')
if path == '/':
path = ''
try :
currentObject = os.path.basename(path) if os.path.basename(path) is not None else ''

# check if we are listing contents of a directory
if currentObject.find('.') == -1:
data = []
# using Arrow lists as they are recommended for large results
# sream will be an async iterable of RecordBatch
stream = obs.list(self._content_managers[drive_name], path, chunk_size=100, return_arrow=True)
async for batch in stream:
contents_list = pyarrow.record_batch(batch).to_pylist()
for object in contents_list:
data.append({
"path": object["path"],
"last_modified": object["last_modified"].isoformat(),
"size": object["size"],
})
else:
content = b""
# retrieve contents of object
obj = await obs.get_async(self._content_managers[drive_name], path)
stream = obj.stream(min_chunk_size=5 * 1024 * 1024) # 5MB sized chunks
async for buf in stream:
content += buf

# retrieve metadata of object
metadata = await obs.head_async(self._content_managers[drive_name], path)

# for certain media type files, extracted content needs to be read as a byte array and decoded to base64 to be viewable in JupyterLab
# the following extensions correspond to a base64 file format or are of type PDF
ext = os.path.splitext(path)[1]
if ext == '.pdf' or ext == '.svg' or ext == '.tif' or ext == '.tiff' or ext == '.jpg' or ext == '.jpeg' or ext == '.gif' or ext == '.png' or ext == '.bmp' or ext == '.webp':
processed_content = base64.b64encode(content).decode("utf-8")
else:
processed_content = content.decode("utf-8")

data = {
"path": path,
"content": processed_content,
"last_modified": metadata["last_modified"].isoformat(),
"size": metadata["size"]
}
response = {
"data": data
}
except Exception as e:
raise tornado.web.HTTPError(
status_code= httpx.codes.BAD_REQUEST,
reason=f"The following error occured when retrieving the contents: {e}",
)

return response

async def new_file(self, drive_name, path, **kwargs):
"""Create a new file or directory at the given path.
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ classifiers = [
]
dependencies = [
"obstore>=0.2.0,<0.3",
"pyarrow>=18.0.0,<19.0.0",
"jupyter_server>=2.14.2,<3",
"s3contents>=0.11.1,<0.12.0",
"apache-libcloud>=3.8.0, <4",
Expand Down
108 changes: 79 additions & 29 deletions src/contents.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
// Copyright (c) Jupyter Development Team.
// Distributed under the terms of the Modified BSD License.

import { JupyterFrontEnd } from '@jupyterlab/application';
import { Signal, ISignal } from '@lumino/signaling';
import { Contents, ServerConnection } from '@jupyterlab/services';
import { PathExt } from '@jupyterlab/coreutils';
import { IDriveInfo } from './token';
import { mountDrive } from './requests';
import { IDriveInfo, IRegisteredFileTypes } from './token';
import { getContents, mountDrive } from './requests';

let data: Contents.IModel = {
name: '',
Expand Down Expand Up @@ -120,6 +117,20 @@ export class Drive implements Contents.IDrive {
return this._serverSettings;
}

/**
* The registered file types
*/
get registeredFileTypes(): IRegisteredFileTypes {
return this._registeredFileTypes;
}

/**
* The registered file types
*/
set registeredFileTypes(fileTypes: IRegisteredFileTypes) {
this._registeredFileTypes = fileTypes;
}

/**
* A signal emitted when a file operation takes place.
*/
Expand Down Expand Up @@ -185,40 +196,41 @@ export class Drive implements Contents.IDrive {
): Promise<Contents.IModel> {
let relativePath = '';
if (localPath !== '') {
if (localPath.includes(this.name)) {
relativePath = localPath.split(this.name + '/')[1];
} else {
relativePath = localPath;
}

// extract current drive name
const currentDrive = this.drivesList.filter(x => x.name === localPath)[0];
const currentDrive = this._drivesList.filter(
x =>
x.name ===
(localPath.indexOf('/') !== -1
? localPath.substring(0, localPath.indexOf('/'))
: localPath)
)[0];

// when accessed the first time, mount drive
if (!currentDrive.mounted) {
if (currentDrive.mounted === false) {
try {
await mountDrive(localPath, {
provider: currentDrive.provider,
region: currentDrive.region
});
currentDrive.mounted = true;
this._drivesList.filter(x => x.name === localPath)[0].mounted = true;
} catch (e) {
console.log(e);
}
}

data = {
name: PathExt.basename(localPath),
path: PathExt.basename(localPath),
last_modified: '',
created: '',
content: [],
format: 'json',
mimetype: '',
size: undefined,
writable: true,
type: 'directory'
};
// eliminate drive name from path
relativePath =
localPath.indexOf('/') !== -1
? localPath.substring(localPath.indexOf('/') + 1)
: '';

data = await getContents(currentDrive.name, {
path: relativePath,
registeredFileTypes: this._registeredFileTypes
});
} else {
// retriving list of contents from root
// in our case: list available drives
const drivesList: Contents.IModel[] = [];
for (const drive of this._drivesList) {
drivesList.push({
Expand Down Expand Up @@ -248,7 +260,6 @@ export class Drive implements Contents.IDrive {
type: 'directory'
};
}
console.log('GET: ', relativePath);

Contents.validateContentsModel(data);
return data;
Expand Down Expand Up @@ -558,7 +569,11 @@ export class Drive implements Contents.IDrive {
* checkpoint is created.
*/
createCheckpoint(path: string): Promise<Contents.ICheckpointModel> {
return Promise.reject('Repository is read only');
const emptyCheckpoint: Contents.ICheckpointModel = {
id: '',
last_modified: ''
};
return Promise.resolve(emptyCheckpoint);
}

/**
Expand Down Expand Up @@ -599,6 +614,40 @@ export class Drive implements Contents.IDrive {
return Promise.reject('Read only');
}

/**
* Get all registered file types and store them accordingly with their file
* extension (e.g.: .txt, .pdf, .jpeg), file mimetype (e.g.: text/plain, application/pdf)
* and file format (e.g.: base64, text).
*
* @param app
*/
getRegisteredFileTypes(app: JupyterFrontEnd) {
// get called when instating the toolbar
const registeredFileTypes = app.docRegistry.fileTypes();

for (const fileType of registeredFileTypes) {
// check if we are dealing with a directory
if (fileType.extensions.length === 0) {
this._registeredFileTypes[''] = {
fileType: 'directory',
fileFormat: 'json',
fileMimeTypes: ['text/directory']
};
}

// store the mimetype and fileformat for each file extension
fileType.extensions.forEach(extension => {
if (!this._registeredFileTypes[extension]) {
this._registeredFileTypes[extension] = {
fileType: fileType.name,
fileMimeTypes: [...fileType.mimeTypes],
fileFormat: fileType.fileFormat ? fileType.fileFormat : ''
};
DenisaCG marked this conversation as resolved.
Show resolved Hide resolved
}
});
}
}

/**
* Get a REST url for a file given a path.
*/
Expand All @@ -619,6 +668,7 @@ export class Drive implements Contents.IDrive {
private _fileChanged = new Signal<this, Contents.IChangedArgs>(this);
private _isDisposed: boolean = false;
private _disposed = new Signal<this, void>(this);
private _registeredFileTypes: IRegisteredFileTypes = {};
}

export namespace Drive {
Expand Down
7 changes: 5 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ const drivesListProvider: JupyterFrontEndPlugin<IDriveInfo[]> = {
mounted: drive.mounted
});
}
} catch {
console.log('Failed loading available drives list.');
} catch (error) {
console.log('Failed loading available drives list, with error: ', error);
}
return drives;
}
Expand Down Expand Up @@ -224,6 +224,9 @@ const driveFileBrowser: JupyterFrontEndPlugin<void> = {

app.serviceManager.contents.addDrive(drive);

// get registered file types
drive.getRegisteredFileTypes(app);

// Manually restore and load the drive file browser.
const driveBrowser = fileBrowserFactory.createFileBrowser('drivebrowser', {
auto: false,
Expand Down
Loading
Loading