{ "$schema": "./config.schema.json", "custom_adapters": [ { "name": "tesseract", "version": 1, "description": "Использует Tesseract OCR для распознавания текста на изображениях", "extensions": ["jpg", "jpeg", "jpe", "png", "webp", "gif", "tif", "tiff", "jp2", "j2k", "jpf", "jpm", "jpg2", "jpg2000", "jpeg2000", "j2c", "jpc", "jpx", "bmp", "pnm"], "mimetypes": ["image/jpeg", "image/png", "image/webp", "image/gif", "image/tiff", "image/jp2", "image/jpx", "image/jpm", "image/bmp", "image/x-portable-anymap"], "binary": "tesseract", "args": ["--psm", "1", "-l", "eng+rus+ukr", "-", "-"], "disabled_by_default": false, "match_only_by_mime": false }, { "name": "antiword", "version": 1, "description": "Использует antiword для извлечения текста из файлов DOC", "extensions": ["doc"], "mimetypes": ["application/msword"], "binary": "antiword", "args": ["-"], "disabled_by_default": false, "match_only_by_mime": false, "output_path_hint": "${input_virtual_path}.txt" } ] }