Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add cache max size setting to ResourceService #628

Merged
merged 2 commits into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ Priority order:
| storage.maxUploadedFileSize | 536870912 | No |Maximum size in bytes of uploaded file. If a size of uploaded file exceeds the limit the server returns HTTP code 413
| encryption.secret | - | No |Secret is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string.
| encryption.key | - | No |Key is used for AES encryption of a prefix to the bucket blob storage. The value should be random generated string.
| resources.maxSize | 1048576 | No |Max allowed size in bytes for a resource.
| resources.maxSize | 67108864 | No |Max allowed size in bytes for a resource.
| resources.cacheMaxSize | 1048576 | No |Max size in bytes for a resource to cache in Redis.
astsiapanay marked this conversation as resolved.
Show resolved Hide resolved
| resources.syncPeriod | 60000 | No |Period in milliseconds, how frequently check for resources to sync.
| resources.syncDelay | 120000 | No |Delay in milliseconds for a resource to be written back in object storage after last modification.
| resources.syncBatch | 4096 | No |How many resources to sync in one go.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ public class Proxy implements Handler<HttpServerRequest> {
public static final String HEADER_UPSTREAM_EXTRA_DATA = "X-UPSTREAM-EXTRA-DATA";
public static final String HEADER_UPSTREAM_ATTEMPTS = "X-UPSTREAM-ATTEMPTS";
public static final String HEADER_CONTENT_TYPE_APPLICATION_JSON = "application/json";

public static final int REQUEST_BODY_MAX_SIZE_BYTES = 16 * 1024 * 1024;

private static final Set<HttpMethod> ALLOWED_HTTP_METHODS = Set.of(HttpMethod.GET, HttpMethod.POST, HttpMethod.PUT, HttpMethod.DELETE, HttpMethod.HEAD);

private final Vertx vertx;
Expand Down Expand Up @@ -152,7 +149,7 @@ private void handleRequest(HttpServerRequest request) {
}
} else {
// not only the case, Content-Length can be missing when Transfer-Encoding: chunked
if (contentLength > REQUEST_BODY_MAX_SIZE_BYTES) {
if (contentLength > resourceService.getMaxSize()) {
respond(request, HttpStatus.REQUEST_ENTITY_TOO_LARGE, "Request body is too large");
return;
}
Expand Down
3 changes: 2 additions & 1 deletion server/src/main/resources/aidial.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
}
},
"resources": {
"maxSize" : 1048576,
"maxSize" : 67108864,
"cacheMaxSize": 1048576,
"syncPeriod": 60000,
"syncDelay": 120000,
"syncBatch": 4096,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.epam.aidial.core.server.security.ExtractedClaims;
import com.epam.aidial.core.storage.blobstore.BlobStorage;
import com.epam.aidial.core.storage.http.HttpException;
import com.epam.aidial.core.storage.service.ResourceService;
import io.vertx.core.Future;
import io.vertx.core.MultiMap;
import io.vertx.core.Vertx;
Expand All @@ -29,6 +30,8 @@
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;

import java.util.LinkedHashMap;
import java.util.List;
Expand All @@ -54,6 +57,7 @@
import static org.mockito.Mockito.when;

@ExtendWith(MockitoExtension.class)
@MockitoSettings(strictness = Strictness.LENIENT)
public class ProxyTest {

@Mock
Expand All @@ -72,6 +76,8 @@ public class ProxyTest {
private AccessTokenValidator accessTokenValidator;
@Mock
private BlobStorage storage;
@Mock
private ResourceService resourceService;

@Mock(answer = Answers.RETURNS_DEEP_STUBS)
private HttpServerRequest request;
Expand All @@ -84,6 +90,7 @@ public class ProxyTest {

@BeforeEach
public void beforeEach() {
when(resourceService.getMaxSize()).thenReturn(67108864);
when(request.response()).thenReturn(response);
when(request.getHeader(HttpHeaders.ACCESS_CONTROL_REQUEST_METHOD)).thenReturn(null);
when(request.getHeader(HttpHeaders.ACCESS_CONTROL_REQUEST_HEADERS)).thenReturn(null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,46 @@ void testMaxKeySize() {

@Test
void testMaxContentSize() {
Response response = resourceRequest(HttpMethod.PUT, "/folder/big", "1".repeat(1024 * 1024 + 1));
verify(response, 413, "Resource size: 1048577 exceeds max limit: 1048576");
Response response = resourceRequest(HttpMethod.PUT, "/folder/big", "1".repeat(64 * 1024 * 1024 + 1));
verify(response, 413, "Request body is too large");
}

@Test
void testBigContentSize() {
String template = """
{
"id": "conversation_id",
"name": "display_name",
"model": {"id": "model_id"},
"prompt": "%s",
"temperature": 1,
"folderId": "folder1",
"messages": [],
"selectedAddons": ["R", "T", "G"],
"assistantModelId": "assistantId",
"lastActivityDate": 4848683153
}
""";
String big = template.formatted("0".repeat(4 * 1024 * 1024));
String small = template.formatted("12345");

Response response = resourceRequest(HttpMethod.PUT, "/folder/big", big);
verify(response, 200);

response = resourceRequest(HttpMethod.GET, "/folder/big");
verifyJson(response, 200, big);

response = resourceRequest(HttpMethod.PUT, "/folder/big", small);
verify(response, 200);

response = resourceRequest(HttpMethod.GET, "/folder/big");
verifyJson(response, 200, small);

response = resourceRequest(HttpMethod.DELETE, "/folder/big");
verify(response, 200);

response = resourceRequest(HttpMethod.GET, "/folder/big");
verify(response, 404);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,13 @@ public static void afterAll() throws IOException {
}

@BeforeEach
public void beforeEach() throws Exception {
public void beforeEach() {
RKeys keys = redissonClient.getKeys();
for (String key : keys.getKeys()) {
keys.delete(key);
}
LockService lockService = new LockService(redissonClient, null);
ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256);
ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256);
ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage,
lockService, settings, null);
rateLimiter = new RateLimiter(vertx, resourceService);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public void beforeEach() {
keys.delete(key);
}
LockService lockService = new LockService(redissonClient, null);
ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256);
ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256);
ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage,
lockService, settings, null);
store = new ApiKeyStore(resourceService, vertx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public void beforeEach() {
keys.delete(key);
}
LockService lockService = new LockService(redissonClient, null);
ResourceService.Settings settings = new ResourceService.Settings(1048576, 60000, 120000, 4096, 300000, 256);
ResourceService.Settings settings = new ResourceService.Settings(64 * 1048576, 1048576, 60000, 120000, 4096, 300000, 256);
ResourceService resourceService = new ResourceService(mock(TimerService.class), redissonClient, blobStorage,
lockService, settings, null);
tracker = new TokenStatsTracker(vertx, resourceService);
Expand Down
3 changes: 2 additions & 1 deletion server/src/test/resources/aidial.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
"key": "salt"
},
"resources": {
"maxSize" : 1048576,
"maxSize" : 67108864,
"cacheMaxSize": 1048576,
"syncPeriod": 60000,
"syncDelay": 120000,
"syncBatch": 4096,
Expand Down
7 changes: 1 addition & 6 deletions server/src/test/resources/response.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,5 @@
"content": "As an AI language model, I do not have emotions like humans. However, I am functioning well and ready to assist you. How can I help you today?"
}
}
],
"usage": {
"completion_tokens": 33,
"prompt_tokens": 19,
"total_tokens": 52
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public class ResourceService implements AutoCloseable {
private final ResourceTopic topic;
@Getter
private final int maxSize;
private final int cacheMaxSize;
astsiapanay marked this conversation as resolved.
Show resolved Hide resolved
private final TimerService.Timer syncTimer;
private final long syncDelay;
private final int syncBatch;
Expand All @@ -109,6 +110,7 @@ public ResourceService(TimerService timerService,
this.lockService = lockService;
this.topic = new ResourceTopic(redis, "resource:" + BlobStorageUtil.toStoragePath(prefix, "topic"));
this.maxSize = settings.maxSize;
this.cacheMaxSize = settings.cacheMaxSize();
this.syncDelay = settings.syncDelay;
this.syncBatch = settings.syncBatch;
this.cacheExpiration = Duration.ofMillis(settings.cacheExpiration);
Expand Down Expand Up @@ -349,7 +351,7 @@ public ResourceStream getResourceStream(ResourceDescriptor resource, EtagHeader
String contentType = metadata.getContentMetadata().getContentType();
Long length = metadata.getContentMetadata().getContentLength();

if (length <= maxSize) {
if (length <= cacheMaxSize) {
result = blobToResult(blob, metadata);
redisPut(key, result);
return ResourceStream.fromResult(result, etagHeader);
Expand Down Expand Up @@ -391,7 +393,7 @@ private ResourceItemMetadata putResource(
String newEtag = EtagBuilder.generateEtag(body);
Result result = new Result(body, newEtag, createdAt, updatedAt, contentType,
descriptor.getType().requireCompression(), (long) body.length, descriptor.getType().name(), false);
if (body.length <= maxSize) {
if (body.length <= cacheMaxSize) {
redisPut(redisKey, result);
if (metadata == null) {
String blobKey = blobKey(descriptor);
Expand Down Expand Up @@ -866,6 +868,7 @@ public record MultipartData(

/**
* @param maxSize - max allowed size in bytes for a resource.
* @param cacheMaxSize - max size in bytes to cache resource in Redis.
* @param syncPeriod - period in milliseconds, how frequently check for resources to sync.
* @param syncDelay - delay in milliseconds for a resource to be written back in object storage after last modification.
* @param syncBatch - how many resources to sync in one go.
Expand All @@ -875,6 +878,7 @@ public record MultipartData(
@JsonIgnoreProperties(ignoreUnknown = true)
public record Settings(
int maxSize,
int cacheMaxSize,
long syncPeriod,
long syncDelay,
int syncBatch,
Expand Down
Loading