Docs
API
Functions
Text Features

Text Features

This is the description of our different API endpoints for image to text.

To use just send a base64 image

Image Captioning (/caption_image)

Generate a human comprehendable caption for an image.

def image_to_base64(image_path: str) -> str:
    with open(image_path, "rb") as image_file:
        # Read the image, encode it in base64, and convert to string
        return base64.b64encode(image_file.read()).decode('utf-8')


url = f"https://gateway.ezml.io/api/v1/functions/caption_image"

payload = {
    "image": image_to_base64("<path to image>"),
}
headers = {
    "Authorization": "Bearer <token from /auth>"
}

res = requests.post(url, json=payload, headers=headers)

caption = res.json()["result"]

Image Tagging (/tag_image)

Generate a list of tags for an image.

def image_to_base64(image_path: str) -> str:
    with open(image_path, "rb") as image_file:
        # Read the image, encode it in base64, and convert to string
        return base64.b64encode(image_file.read()).decode('utf-8')


url = f"https://gateway.ezml.io/api/v1/functions/tag_image"

payload = {
    "image": image_to_base64("<path to image>"),
}
headers = {
    "Authorization": "Bearer <token from /auth>"
}

res = requests.post(url, json=payload, headers=headers)


for obj in res.json()["result"]:
    print(f"Tag: {obj['tag']}")
    print(f"Confidence: {obj['conf']}")

Visual Question Answering (/visual_question_answering)

Ask a question about an image and get an answer using our 13B parameter model.

def image_to_base64(image_path: str) -> str:
    with open(image_path, "rb") as image_file:
        # Read the image, encode it in base64, and convert to string
        return base64.b64encode(image_file.read()).decode('utf-8')

def test_vqa():
    url = f"https://gateway.ezml.io/api/v1/functions/visual_question_answering"
    payload = {
        "image": image_to_base64("<path to image>"),
        "prompt": "Describe this image including the make and model of each vehicle"
    }
    headers = {
        "Authorization": "Bearer <token from /auth>"
    }

    res = requests.post(url, json=payload, headers=headers)
    res = res.json()

    print(res["result"]) # print the answer to question
Last updated on October 23, 2023