Inspect image

Inspects an image to extract attributes both for filtering/warning about bad training set images and as characteristics to be added to the fine tune.

Parameters

`name` (required)

Class name of the object to be inspected in the image. This should be the same class name that will be used to create the tune object.

`file` (required)

The image to inspect.

`file_url` (optional)

The image url to inspect if file is not provided.

Returns

The below JSON schema is returned. It is possible that some attributes will not be returned.

{
  "type": "object",
  "properties": {
    "name": {
      "type": "string",
      "enum": [
        "man",
        "woman",
        "boy",
        "girl",
        "baby",
        "cat",
        "dog",
        "NONE"
      ]
    },
    "body_type": {
      "type": "string",
      "enum": [
        "slim body",
        "average body",
        "muscular body",
        "plussize body",
        "NONE"
      ]
    },
    "ethnicity": {
      "type": "string",
      "enum": [
        "caucasian",
        "black",
        "hispanic",
        "korean",
        "japanese",
        "chinese",
        "philippine",
        "asian",
        "indian",
        "arabic",
        "caribbean",
        "african",
        "NONE"
      ]
    },
    "age": {
      "type": "string",
      "enum": [
        "20 yo",
        "30 yo",
        "40 yo",
        "50 yo",
        "60 yo",
        "70 yo"
      ]
    },
    "glasses": {
      "type": "string",
      "enum": [
        "glasses",
        "NONE"
      ]
    },
    "eye_color": {
      "type": "string",
      "enum": [
        "blue eyes",
        "brown eyes",
        "green eyes",
        "gray eyes",
        "black eyes",
        "NONE"
      ]
    },
    "hair_color": {
      "type": "string",
      "enum": [
        "black hair",
        "brown hair",
        "blonde hair",
        "red hair",
        "grey hair",
        "white hair",
        "purple hair",
        "blue hair",
        "green hair",
        "pink hair",
        "dyed hair",
        "NONE"
      ]
    },
    "hair_length": {
      "type": "string",
      "enum": [
        "bald",
        "short hair",
        "medium hair",
        "long hair",
        "very long hair",
        "NONE"
      ]
    },
    "hair_style": {
      "type": "string",
      "enum": [
        "bald head",
        "balding top",
        "straight hair",
        "wavy hair",
        "curly hair",
        "afro",
        "dreadlocks",
        "cornrows",
        "slicked back",
        "comb over",
        "receding hairline",
        "undercut",
        "man bun",
        "mohawk",
        "crew cut",
        "faux hawk",
        "buzzcut",
        "NONE"
      ]
    },
    "facial_hair": {
      "type": "string",
      "enum": [
        "mustache",
        "beard",
        "goatee",
        "NONE"
      ]
    },
    "is_bald": {
      "type": "string",
      "enum": [
        "bald",
        "NONE"
      ]
    },
    "headcover": {
      "type": "string",
      "enum": [
        "with head cover",
        "NONE"
      ]
    },
    "funny_face": {
      "type": "boolean"
    },
    "wearing_sunglasses": {
      "type": "boolean"
    },
    "wearing_hat": {
      "type": "boolean"
    },
    "blurry": {
      "type": "boolean"
    },
    "includes_multiple_people": {
      "type": "boolean"
    },
    "full_body_image_or_longshot": {
      "type": "boolean"
    },
    "selfie": {
      "type": "boolean"
    },
    "low_resolution": {
      "type": "boolean"
    },
    "low_quality": {
      "type": "boolean"
    }
  }
}

POST /images/inspect

cURL
Python
Node.js

curl -X POST "https://api.astria.ai/images/inspect" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -F "name=man" \
  -F "file=@/path/to/your/image.jpg"

import requests

url = "https://api.astria.ai/images/inspect"
headers = {
    "Authorization": "Bearer YOUR_API_KEY"
}
files = {
    "file": open("/path/to/your/image.jpg", "rb")
}
data = {
    "name": "man"
}

response = requests.post(url, headers=headers, files=files, data=data)
print(response.json())

const axios = require('axios');
const FormData = require('form-data');
const fs = require('fs');

const url = "https://api.astria.ai/images/inspect";
const form = new FormData();
form.append("name", "man");
form.append("file", fs.createReadStream("/path/to/your/image.jpg"));

axios.post(url, form, {
    headers: {
        "Authorization": `Bearer YOUR_API_KEY`,
        ...form.getHeaders()
    }
})
.then(response => {
    console.log(response.data);
})
.catch(error => {
    console.error(error);
});

Response

{
  "age": "20 yo",
  "blurry": false,
  "ethnicity": "caucasian",
  "eye_color": "brown eyes",
  "facial_hair": "",
  "full_body_image_or_longshot": false,
  "funny_face": false,
  "glasses": "",
  "hair_color": "brunette",
  "hair_length": "medium hair",
  "hair_style": "wavy hair",
  "includes_multiple_people": false,
  "is_bald": "",
  "name": "woman",
  "selfie": true,
  "wearing_hat": false,
  "wearing_sunglasses": false
}

Example implementation

The images inspect API is meant to be used by the client-side of your app. The /images/inspect needs to be proxied by your server to avoid exposing your API key to the client.

The example below implements two behaviors

createWarning notifies the user about an attribute is true such as funny_face or wearing_sunglasses. The function expects a form input element containing the class name selected by the user.
aggregateCharacteristics aggregates the most common values for each key in the characteristics object and sets the characteristicsInputTarget value to the aggregated characteristics.

  async inspect(previewEl, file) {
    const form = document.getElementById('new_tune');
    const formValues = Object.fromEntries(new FormData(form));
    const name = formValues['tune[name]'];
    const csrfToken = document.querySelector("[name='csrf-token']").content;
    const formData = new FormData();
    formData.append('authenticity_token', csrfToken);
    formData.append('name', name);
  
    // Check if file is an image and readable
    if (file.type.startsWith('image/')) {
      try {
        const resizedFile = await this.resizeImage(file);
        formData.append('file', resizedFile || file);
      } catch (error) {
        console.warn('Image resizing failed, uploading original file:', error);
        formData.append('file', file);
      }
    } else {
      formData.append('file', file);
    }
  
    const response = await fetch('/images/inspect', {
      method: 'POST',
      body: formData,
    });
    const data = await response.json();
    if (!data['name']) {
      this.createWarning(previewEl, `Could not detect image`);
    }
  
    // Iterate over hash and add warning messages for each true value
    Object.keys(data).forEach((key) => {
      if (key === 'name') {
        if (data[key] === '') {
          this.createWarning(previewEl, `Could not detect ${name} in the image`);
        } else if (data[key] && data[key] !== name) {
          this.createWarning(previewEl, `Could not detect ${name} in the image (2)`);
        }
      } else if (data[key] === true) {
        const warning = capitalizeFirstLetter(key.replace(/_/g, " "));
        this.createWarning(previewEl, warning);
      }
    });
  
    this.characteristics.push(data);
    this.aggregateCharacteristics();
    previewEl.querySelector('.loading').classList.add('d-none');
    previewEl.querySelector('.remove-btn').classList.remove('d-none');
  }
  
  // Helper function to resize the image
  async resizeImage(file) {
    return new Promise((resolve, reject) => {
      const img = new Image();
      const reader = new FileReader();
  
      reader.onload = (e) => {
        img.onload = () => {
          const canvas = document.createElement('canvas');
          const maxDimension = 512; // Set max dimension for resizing
          let width = img.width;
          let height = img.height;
  
          if (width <= maxDimension && height <= maxDimension) {
            console.log(`Image is already smaller than ${maxDimension}x${maxDimension}`)
            resolve(file);
            return;
          }
          // Calculate new dimensions while maintaining aspect ratio
          if (width > height) {
            if (width > maxDimension) {
              height = Math.round(height * maxDimension / width);
              width = maxDimension;
            }
          } else {
            if (height > maxDimension) {
              width = Math.round(width * maxDimension / height);
              height = maxDimension;
            }
          }
          console.log(`Resizing image to ${width}x${height} from ${img.width}x${img.height}`)
  
          canvas.width = width;
          canvas.height = height;
          const ctx = canvas.getContext('2d');
          ctx.drawImage(img, 0, 0, width, height);
  
          canvas.toBlob((blob) => {
            resolve(blob ? new File([blob], file.name, { type: file.type }) : null);
          }, file.type, 0.9); // Adjust quality if needed
        };
  
        img.onerror = reject;
        img.src = e.target.result;
      };
  
      reader.onerror = reject;
      reader.readAsDataURL(file);
    });
  }


  aggregateCharacteristics() {
    const aggregated = {};
    // Iterate over this.characteristics and select value which is more common into this.aggregatedCharacteristics
    // use only the characeteristics that are string
    this.characteristics.forEach((characteristic) => {
      Object.keys(characteristic).forEach((key) => {
        if (typeof characteristic[key] === 'string') {
          if (aggregated[key]) {
            aggregated[key].push(characteristic[key]);
          } else {
            aggregated[key] = [characteristic[key]];
          }
        }
      });
    });
    console.log('aggregated', aggregated);

    const commonValues = {};
    // find most common value for each key and set aggregatedCharacteristics to that value
    Object.keys(aggregated).forEach((key) => {
      const values = aggregated[key];

      // only set the value if a high enough percentage was detected for a value
      // console.log(key, 'values.length', values.length, this.characteristics.length, aggregated[key]);
      if (values.length < this.characteristics.length / 2) {
        console.log('Not enough values for', key);
        return
      }

      // Now aggregate and find the most common value
      const mostCommonValue = values.sort((a, b) =>
        values.filter(v => v === a).length - values.filter(v => v === b).length
      ).pop();
      commonValues[key] = mostCommonValue;
    });
    console.log('commonValues', commonValues);
    this.characteristicsInputTarget.value = JSON.stringify(commonValues);
  }
  

Parameters​

name (required)​

file (required)​

file_url (optional)​

Returns​

POST /images/inspect​

Response​

Example implementation​