Full Example
Python Example
Full Example
Python Example
Start parsing your documents in minutes
Full Example
To help get you started with directly calling the AnyParser RT API, here’s a full example.
import base64
import json
import time
from pathlib import Path
from typing import Tuple
import requests
URL = "https://public-api.cambio-ai.com/parse"
TIMEOUT = 30
class AnyParser:
"""AnyParser: Real-time parser for any data format."""
def __init__(self, api_key: str, url: str = URL) -> None:
"""Initialize the AnyParser object.
Args:
api_key (str): The API key for the AnyParser
url (str): The URL of the AnyParser RT API.
Returns:
None
"""
self._url = url
self._api_key = api_key
def parse(self, file_path: str) -> Tuple[str, str]:
"""Parse data in real-time.
Args:
file_path (str): The path to the file to be parsed.
Returns:
tuple(str, str): The parsed data and the time taken.
"""
file_extension = Path(file_path).suffix.lower().lstrip(".")
# Check if the file exists
if not Path(file_path).is_file():
return "Error: File does not exist", "File does not exist"
if file_extension in ["pdf", "docx", "pptx", "png", "jpg", "jpeg"]:
# Encode the PDF file content in base64
with open(file_path, "rb") as file:
encoded_file = base64.b64encode(file.read()).decode("utf-8")
else:
return "Error: Unsupported file type", "Unsupported file type"
# Create the JSON payload
payload = {
"file_content": encoded_file,
"file_type": file_extension,
}
# Set the headers
headers = {
"Content-Type": "application/json",
"x-api-key": self._api_key,
}
# Send the POST request
start_time = time.time()
response = requests.post(
self._url, headers=headers, data=json.dumps(payload), timeout=TIMEOUT
)
end_time = time.time()
# Check if the request was successful
if response.status_code == 200:
try:
response_data = response.json()
response_list = []
for text in response_data["markdown"]:
response_list.append(text)
markdown_text = "\n".join(response_list)
return (
markdown_text,
f"Time Elapsed: {end_time - start_time:.2f} seconds",
)
except json.JSONDecodeError:
return "Error: Invalid JSON response", f"Response: {response.text}"
else:
return f"Error: {response.status_code}", f"Response: {response.text}"
Here’s an implementation of that custom AnyParser
class:
from IPython.display import display, Markdown
ap = AnyParser(api_key="...")
md_output, total_time = ap.parse(file_path="...")
display(Markdown(md_output))
On this page