-
Notifications
You must be signed in to change notification settings - Fork 16
/
encodec.h
184 lines (167 loc) · 6.99 KB
/
encodec.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
/*
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Pierre-Antoine Bannier │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
/*
* This file contains the declarations of the structs and functions used in the encodec library.
* The library provides functionality for audio compression and decompression using a custom model.
* The model consists of an encoder, a quantizer and a decoder, each with their own set of parameters.
* The library also provides functions for loading and freeing the model, as well as compressing and decompressing audio data.
*
*/
#pragma once
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "ggml.h"
#ifdef __cplusplus
extern "C" {
#endif
struct encodec_context;
struct encodec_statistics {
// The time taken to load the model.
int64_t t_load_us;
// The time taken to compute the model.
int64_t t_compute_us;
};
/**
* Loads an encodec model from the specified file path.
*
* @param model_path The file path to the encodec model.
* @param offset The offset (in bytes) to the start of the model in the file.
* @param n_gpu_layers The number of GPU layers to use.
* @return A pointer to the encodec context struct.
*/
struct encodec_context *encodec_load_model(
const char *model_path,
const int offset,
int n_gpu_layers);
/**
* Sets the target bandwidth for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param bandwidth The target bandwidth to set, in bits per second.
*/
void encodec_set_target_bandwidth(
struct encodec_context *ectx,
int bandwidth);
/**
* Sets the sample rate for the given encodec context.
*
* @param ectx The encodec context to set the target bandwidth for.
* @param sample_rate The sample rate to set.
*/
void encodec_set_sample_rate(
struct encodec_context *ectx,
int sample_rate);
/**
* Reconstructs audio from raw audio data using the specified encodec context.
*
* @param ectx The encodec context to use for reconstruction.
* @param raw_audio The raw audio data to reconstruct.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for reconstruction.
* @return True if the reconstruction was successful, false otherwise.
*/
bool encodec_reconstruct_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Compresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for compression.
* @param raw_audio The raw audio data to compress.
* @param n_samples The number of samples in the raw audio buffer.
* @param n_threads The number of threads to use for compression.
* @return True if the compression was successful, false otherwise.
*/
bool encodec_compress_audio(
struct encodec_context *ectx,
const float *raw_audio,
const int n_samples,
int n_threads);
/**
* Decompresses audio data using the specified encodec context.
*
* @param ectx The encodec context to use for decompression.
* @param codes The compressed audio data to decompress.
* @param n_codes The number of codes in the codes buffer.
* @param n_threads The number of threads to use for decompression.
* @return True if the audio data was successfully decompressed, false otherwise.
*/
bool encodec_decompress_audio(
struct encodec_context *ectx,
const int32_t *codes,
const int n_codes,
int n_threads);
/**
* Gets the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio data from.
* @return A pointer to the audio data.
*/
float * encodec_get_audio(
struct encodec_context *ectx);
/**
* Gets the size of the audio data from the given encodec context.
*
* @param ectx The encodec context to get the audio size from.
* @return The size of the audio data.
*/
int encodec_get_audio_size(
struct encodec_context *ectx);
/**
* Gets the code data from the given encodec context.
*
* @param ectx The encodec context to get the code data from.
* @return A pointer to the code data.
*/
int32_t * encodec_get_codes(
struct encodec_context *ectx);
/**
* Gets the size of the code data from the given encodec context.
*
* @param ectx The encodec context to get the code size from.
* @return The size of the code data.
*/
int encodec_get_codes_size(
struct encodec_context *ectx);
/**
* Gets the statistics for the given encodec context.
*
* @param ectx The encodec context to get the statistics for.
* @return A pointer to the statistics struct.
*/
const struct encodec_statistics* encodec_get_statistics(
struct encodec_context *ectx);
/**
* Reset the statistics for the given encodec context.
*
* @param ectx The encodec context to reset the statistics for.
*/
void encodec_reset_statistics(
struct encodec_context *ectx);
/**
* @brief Frees the memory allocated for an encodec context.
*
* @param ectx The encodec context to free.
*/
void encodec_free(
struct encodec_context *ectx);
#ifdef __cplusplus
}
#endif