JSON Metadata for External System Mapping

Prev Next

The most important process during AiR integration work is planning how to map metadata into an external system.

The JSON file includes header information about the job configuration, details on which services ran, and technical metadata about the file. The following sections contain the metadata:

  • Speech to Text

  • OCR detections

  • Natural Language Descriptions

  • Logo detections

  • Face detections

  • Audio classifications

  • PDF extraction

All metadata is time-based (in fractional seconds) for audio and video files, and non-time-based for images and documents. The system supports timecode offsets, where the timecode doesn’t start at 00:00:00. Since the timecode is relative, you can add the time-based fractional seconds to the absolute timecode in the external system to accurately reference a specific moment.

The following examples show different data structures within the file.

Job Info

 "job_id": "1ed37889-c49d-4a8f-9542-fe65aca2e0ae","message": "Processing successful", 
 "message_payload": { 

    "job_metadata": { 

      "job_id": "1ed37889-c49d-4a8f-9542-fe65aca2e0ae", 

      "name": "", 

      "batch": "string", 

      "object_key": "30sec_philip.mp4", 

      "object_path": "https://30secondphil/30sec_philip.mp4", 

      "output_destination": "jobs/2025_06_27/1ed37889-c49d-4a8f-9542-fe65aca2e0ae", 

      "destination_path": "https://s3.eu-west-1.wasabisys.com/metabucket/jobs/2025_06_27/1ed37889-c49d-4a8f-9542-fe65aca2e0ae/30sec_philip_metadata.json", 

      "processing_date": "2025-06-27T16:59:40.984651464Z", 

      "airtime_duration": 32.23, 

      "status": "completed", 

      "engine_version": "dev_v1" 

    } 

Services

"services": [ 

      { 

        "service_name": "air_s2t", 

        "service_version": "v0.0.1", 

        "service_start": "2025-06-27T16:21:50.345843927Z", 

        "service_end": "2025-06-27T16:22:52.233017095Z", 

        "processing_duration": 61, 

        "status": "completed", 

        "error_message": "" 

      }, 

      { 

        "service_name": "air_audiotag", 

        "service_version": "v0.0.1", 

        "service_start": "2025-06-27T16:21:50.346504386Z", 

        "service_end": "2025-06-27T16:31:19.342151135Z", 

        "processing_duration": 568, 

        "status": "completed", 

        "error_message": "" 

      }, 

      { 

        "service_name": "air_face", 

        "service_version": "v0.0.2", 

        "service_start": "2025-06-27T16:21:58.230930708Z", 

        "service_end": "2025-06-27T16:23:51.914924845Z", 

        "processing_duration": 113, 

        "status": "completed", 

        "error_message": "" 

      }, 

      { 

        "service_name": "air_nld", 

        "service_version": "v0.0.1", 

        "service_start": "2025-06-27T16:21:58.231107958Z", 

        "service_end": "2025-06-27T16:59:40.984324298Z", 

        "processing_duration": 2262, 

        "status": "completed", 

        "error_message": "" 

      { 

        "service_name": "air_logo", 

        "service_version": "v0.0.1", 

        "service_start": "2025-06-27T16:21:58.23203725Z", 

        "service_end": "2025-06-27T16:48:04.554999753Z", 

        "processing_duration": 1566, 

        "status": "completed", 

        "error_message": "" 

      } 

    ], 

Technical Metadata

"technical_metadata": { 

      "format": "mov,mp4,m4a,3gp,3g2,mj2", 

      "codec": "h264", 

      "resolution": { 

        "width": 1280, 

        "height": 720 

      }, 

      "framerate": 0, 

      "bitrate": 6254802, 

      "embedded_metadata": { 

        "audio_channels": 2, 

        "duration": 32.234667 

      } 

    }, 

Speech to Text (Audio / Video)

"speech_to_text": [ 

      { 

        "start_time": 0.02, 

        "end_time": 1.42, 

        "confidence_score": 0.84, 

        "text": "was met with mixed responses.", 

        "language": "en" 

      }, 

      { 

        "start_time": 1.42, 

        "end_time": 3.32, 

        "confidence_score": 0.84, 

        "text": "You had some, like Micah McKinnon, saying,", 

        "language": "en" 

      }, 

      { 

        "start_time": 3.32, 

        "end_time": 6.02, 

        "confidence_score": 0.84, 

        "text": "Once again, I'm very much okay with the shift in cultural standards.", 

        "language": "en" 

      }, 

OCR Detections (Video)

"ocr_detections": [ 

      { 

        "start_time": 0, 

        "end_time": 1.969, 

        "detections": [ 

          { 

            "text": "Gillette", 

            "confidence_score": 0.948, 

            "text_box": { 

              "top_left_x": 39, 

              "top_left_y": 103, 

              "top_right_x": 232, 

              "top_right_y": 106, 

              "bottom_right_x": 231, 

              "bottom_right_y": 163, 

              "bottom_left_x": 38, 

              "bottom_left_y": 160 

            } 

          } 

        ] 

      }, 

      { 

        "start_time": 1.969, 

        "end_time": 9.843, 

        "detections": [ 

          { 

            "text": "Mika McKinnon", 

            "confidence_score": 0.991, 

            "text_box": { 

              "top_left_x": 132, 

              "top_left_y": 49, 

              "top_right_x": 264, 

              "top_right_y": 49, 

              "bottom_right_x": 264, 

              "bottom_right_y": 68, 

              "bottom_left_x": 132, 

              "bottom_left_y": 68 

            } 

          }, 

Natural Language Descriptions (Video)

"natural_language_descriptions": [ 

      { 

        "start_time": 0, 

        "end_time": 1.969, 

        "description": "A man is sitting in a room with a black couch behind him. He is wearing a blue collared shirt. There are shelves behind the man with various items on them. There is a blue sign hanging from the wall that says Gillette on it. The man has his hands out in front of him and he is speaking." 

      }, 

      { 

        "start_time": 1.969, 

        "end_time": 7.875, 

        "description": "A woman is sitting on a rock. She is wearing a blue hoodie and blue jeans. There is moss growing on the rocks around her. There are large rocks behind her." 

      }, 

      { 

        "start_time": 7.875, 

        "end_time": 9.843, 

        "description": "A woman is sitting on a rock. She is wearing a blue hoodie and blue jeans. There is moss on the rocks in front of her. There are large rocks behind her." 

      }, 

Logo Detections (Video)

"logo_detections": [ 

      { 

        "start_time": 13.780433, 

        "end_time": 15.749067, 

        "detections": [ 

          { 

            "logo_name": "Marlboro", 

            "confidence_score": 0.442, 

            "bounding_box": { 

              "top_left_x": 66, 

              "top_left_y": 771, 

              "top_right_x": 105, 

              "top_right_y": 771, 

              "bottom_right_x": 105, 

              "bottom_right_y": 886, 

              "bottom_left_x": 66, 

              "bottom_left_y": 886 

            } 

          } 

        ] 

      }, 

      { 

        "start_time": 17.7177, 

        "end_time": 19.686333, 

        "detections": [ 

          { 

            "logo_name": "Marlboro", 

            "confidence_score": 0.469, 

            "bounding_box": { 

              "top_left_x": 66, 

              "top_left_y": 773, 

              "top_right_x": 105, 

              "top_right_y": 773, 

              "bottom_right_x": 105, 

              "bottom_right_y": 886, 

              "bottom_left_x": 66, 

              "bottom_left_y": 886 

            } 

          } 

        ] 

      }, 

Facial Detections (Video)

"facial_detections": [ 

      { 

        "frame_name": "frame_1", 

        "start_time": 0, 

        "end_time": 1, 

        "detections": [ 

          { 

            "face_id": "741f8d25-f36b-4bdf-9d21-18c443a1a172", 

            "face_hash": "212d3a0d87fa70c2c7656591910b41e2aa633cdc370ae341142341159b35abe3", 

            "thumbnail_path": "s3.eu-west-1.wasabisys.com/metabucket/jobs/2025_06_27/completed/1ed37889-c49d-4a8f-9542-fe65aca2e0ae/faces/thumbnails/212d3a0d87fa70c2c7656591910b41e2aa633cdc370ae341142341159b35abe3.jpg", 

            "detection_confidence": 1, 

            "bounding_box": { 

              "top_left_x": 561, 

              "top_left_y": 163, 

              "top_right_x": 766, 

              "top_right_y": 163, 

              "bottom_left_x": 561, 

              "bottom_left_y": 455, 

              "bottom_right_x": 766, 

              "bottom_right_y": 455 

            }, 

            "landmarks": { 

              "left_eye_x": 598, 

              "left_eye_y": 265, 

              "right_eye_x": 696, 

              "right_eye_y": 272, 

              "nose_x": 627, 

              "nose_y": 329, 

              "mouth_left_x": 600, 

              "mouth_left_y": 382, 

              "mouth_right_x": 673, 

              "mouth_right_y": 389 

            }, 

            "attributes": { 

              "age": 32, 

              "gender": "Man", 

              "gender_confidence": 0.9991447925567627, 

              "emotion": "neutral", 

              "emotion_confidence": 0.7127898931503296, 

              "race": "white", 

              "race_confidence": 0.7005689740180969 

            }, 

            "embedding": "-0.14500502354876638 0.05153432825295896 -0.11248243413019478 0.12299327109650665 0.1347590330582926 0.08780777875504941 -0.12616054513571673 0.013852484346137778 0.031691658371990304 -0.023041653883144532 0.11251890176719406 -0.033473387637716366 -0.011176484057793049 -0.006413916697595927 0.07423266418375517 0.02841477958106582 -0.057096282494503034 -0.12318404989328695 -0.24039602617942032 0.012336195282513479 -0.15712812221812433 0.06035230878022846 -0.04348453514380505 -0.027614247843446627 -0.018395153454227122 -0.11392290579166636 0.18036530470815865 0.19491547246159033 -0.13561813005941364 0.014017529762209206 0.012664376491772924 -0.004491145660885925 0.03778364109263548 -0.0955825363641404 -0.03993078069459642 0.08726635807134804 -0.019007699398747923 -0.21556133470780572 -0.09930917656167065 -0.03349994935175559 0.02394026972378551 -0.21149572792921947 -0.031833067453758794 -0.028318000889431733 0.07661237671736679 -0.24116891360279322 0.1398211667742131 -0.02988070676459749 -0.021299588939532807 0.06251143300239934 0.005907301086290135 0.031439994454121566 0.11556509234692522 0.08879520451098191 -0.0327332822922843 -0.04122057695049327 -0.11202700807698497 -0.12167560247256685 0.014450723191554058 -0.022721434896957635 -0.024168433615367697 -0.028021095816662865 0.18417853195179013 0.003872395407975055 0.043232821421084165 -0.05664441093495128 -0.06345949719729237 0.109867501143845 -0.09559391284089261 -0.08199622356503844 -0.11004990224023359 -0.10198490292332861 0.007772598961946866 0.001451410379786556 -0.038783567866455385 0.11659974358791947 0.13189793827958166 0.1013522659635849 0.11162463729784541 0.032520838384822635 -0.05078855039699883 0.09115635294128495 -0.039086635634349706 0.1895244275021303 0.09864445479191608 -0.024831262800740904 0.04497197278084564 0.00257859903605888 0.10165804678526691 -0.10867396718190478 0.008842636550387332 -0.09858416470774534 0.0660151991079064 -0.023614994234452295 0.017026590824669202 0.07420349426824856 -0.14844097117350535 -0.026443466835010844 -0.057964926299387126 0.07938369693943947 -0.018340045695987052 0.06151842386041103 0.022715290550988233 -0.019041896983012774 0.01591965963667825 -0.006967144407889729 -0.07769826404467414 0.0608698860462943 0.02258647423288741 0.04213740301786182 0.002699261120275608 0.08150561431575298 0.03351248444664709 0.03984376375398198 -0.023887956279881353 0.03765584446334955 -0.051169861587606745 0.007595867099451342 -0.08624294175647362 0.06980922789169151 -0.024765921456039848 -0.06672214490704455 0.12777394972905406 0.06398236426291719 0.13095624910576267 0.01828075039820512 0.1306197150985376 0.1321756606203522" 

          } 

        ] 

     }, 

Audio Classifications (Audio / Video)

"audio_classifications": [ 

      { 

        "start_time": 0, 

        "end_time": 1, 

        "labels": [ 

          { 

            "label": "Speech", 

            "confidence": 0.77 

          }, 

          { 

            "label": "Speech synthesizer", 

            "confidence": 0.42 

          }, 

          { 

            "label": "Narration, monologue", 

            "confidence": 0.12 

          } 

        ] 

      }, 

      { 

        "start_time": 1, 

        "end_time": 2, 

        "labels": [ 

          { 

            "label": "Speech", 

            "confidence": 0.84 

          }, 

          { 

            "label": "Male speech, man speaking", 

            "confidence": 0.18 

          }, 

          { 

            "label": "Narration, monologue", 

            "confidence": 0.18 

          } 

        ] 

      }, 

PDF Extractions (If a document)

"pdf_extraction": []