|
import asyncio |
|
|
|
from metagpt.config2 import config |
|
from metagpt.const import EXAMPLE_DATA_PATH |
|
from metagpt.logs import logger |
|
from metagpt.rag.parsers import OmniParse |
|
from metagpt.rag.schema import OmniParseOptions, OmniParseType, ParseResultType |
|
from metagpt.utils.omniparse_client import OmniParseClient |
|
|
|
TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx" |
|
TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf" |
|
TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4" |
|
TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3" |
|
|
|
|
|
async def omniparse_client_example(): |
|
client = OmniParseClient(base_url=config.omniparse.base_url) |
|
|
|
|
|
with open(TEST_DOCX, "rb") as f: |
|
file_input = f.read() |
|
document_parse_ret = await client.parse_document(file_input=file_input, bytes_filename="test_01.docx") |
|
logger.info(document_parse_ret) |
|
|
|
|
|
pdf_parse_ret = await client.parse_pdf(file_input=TEST_PDF) |
|
logger.info(pdf_parse_ret) |
|
|
|
|
|
video_parse_ret = await client.parse_video(file_input=TEST_VIDEO) |
|
logger.info(video_parse_ret) |
|
|
|
|
|
audio_parse_ret = await client.parse_audio(file_input=TEST_AUDIO) |
|
logger.info(audio_parse_ret) |
|
|
|
|
|
async def omniparse_example(): |
|
parser = OmniParse( |
|
api_key=config.omniparse.api_key, |
|
base_url=config.omniparse.base_url, |
|
parse_options=OmniParseOptions( |
|
parse_type=OmniParseType.PDF, |
|
result_type=ParseResultType.MD, |
|
max_timeout=120, |
|
num_workers=3, |
|
), |
|
) |
|
ret = parser.load_data(file_path=TEST_PDF) |
|
logger.info(ret) |
|
|
|
file_paths = [TEST_DOCX, TEST_PDF] |
|
parser.parse_type = OmniParseType.DOCUMENT |
|
ret = await parser.aload_data(file_path=file_paths) |
|
logger.info(ret) |
|
|
|
|
|
async def main(): |
|
await omniparse_client_example() |
|
await omniparse_example() |
|
|
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |
|
|