OSI - Open Semantic Interchange Ontology Definitions

Added on June 11, 2026Managed by admin

OSI core schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://github.com/open-semantic-interchange/OSI/core-spec/osi-schema.json",
  "title": "OSI Core Metadata Specification",
  "description": "JSON Schema for validating OSI (Open Semantic Interoperability) semantic model definitions",
  "type": "object",
  "properties": {
    "version": {
      "type": "string",
      "const": "0.2.0.dev0",
      "description": "OSI specification version"
    },
    "semantic_model": {
      "type": "array",
      "description": "Collection of semantic model definitions",
      "items": {
        "$ref": "#/$defs/SemanticModel"
      }
    }
  },
  "required": ["version", "semantic_model"],
  "additionalProperties": false,
  "$defs": {
    "Dialect": {
      "type": "string",
      "enum": ["ANSI_SQL", "SNOWFLAKE", "MDX", "TABLEAU", "DATABRICKS", "MAQL"],
      "description": "Supported SQL and expression language dialects"
    },
    "Vendor": {
      "type": "string",
      "examples": ["COMMON", "SNOWFLAKE", "SALESFORCE", "DBT", "DATABRICKS", "GOODDATA"],
      "description": "Vendor name for custom extensions. Any string value is accepted."
    },
    "AIContext": {
      "description": "Additional context for AI tools",
      "oneOf": [
        {
          "type": "string"
        },
        {
          "type": "object",
          "properties": {
            "instructions": {
              "type": "string",
              "description": "Instructions for AI on how to use this entity"
            },
            "synonyms": {
              "type": "array",
              "items": {
                "type": "string"
              },
              "description": "Alternative names and terms"
            },
            "examples": {
              "type": "array",
              "items": {
                "type": "string"
              },
              "description": "Sample questions or use cases"
            }
          },
          "additionalProperties": true
        }
      ]
    },
    "CustomExtension": {
      "type": "object",
      "description": "Vendor-specific attributes for extensibility",
      "properties": {
        "vendor_name": {
          "$ref": "#/$defs/Vendor"
        },
        "data": {
          "type": "string",
          "description": "JSON string containing vendor-specific data"
        }
      },
      "required": ["vendor_name", "data"],
      "additionalProperties": false
    },
    "DialectExpression": {
      "type": "object",
      "description": "Expression in a specific dialect",
      "properties": {
        "dialect": {
          "$ref": "#/$defs/Dialect"
        },
        "expression": {
          "type": "string",
          "description": "SQL or dialect-specific expression"
        }
      },
      "required": ["dialect", "expression"],
      "additionalProperties": false
    },
    "Expression": {
      "type": "object",
      "description": "Expression definition with multi-dialect support",
      "properties": {
        "dialects": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/DialectExpression"
          },
          "minItems": 1
        }
      },
      "required": ["dialects"],
      "additionalProperties": false
    },
    "Dimension": {
      "type": "object",
      "description": "Dimension metadata",
      "properties": {
        "is_time": {
          "type": "boolean",
          "description": "Indicates if this is a time-based dimension for temporal filtering"
        }
      },
      "additionalProperties": false
    },
    "Field": {
      "type": "object",
      "description": "Row-level attribute for grouping, filtering, and metric expressions",
      "properties": {
        "name": {
          "type": "string",
          "description": "Unique identifier for the field within the dataset"
        },
        "expression": {
          "$ref": "#/$defs/Expression"
        },
        "dimension": {
          "$ref": "#/$defs/Dimension"
        },
        "label": {
          "type": "string",
          "description": "Label for categorization"
        },
        "description": {
          "type": "string",
          "description": "Human-readable description"
        },
        "ai_context": {
          "$ref": "#/$defs/AIContext"
        },
        "custom_extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/CustomExtension"
          }
        }
      },
      "required": ["name", "expression"],
      "additionalProperties": false
    },
    "Dataset": {
      "type": "object",
      "description": "Logical dataset representing a business entity (fact or dimension table)",
      "properties": {
        "name": {
          "type": "string",
          "description": "Unique identifier for the dataset"
        },
        "source": {
          "type": "string",
          "description": "Reference to underlying physical table/view (database.schema.table) or query"
        },
        "primary_key": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "Primary key columns (single or composite)"
        },
        "unique_keys": {
          "type": "array",
          "items": {
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "description": "Array of unique key definitions (each can be single or composite)"
        },
        "description": {
          "type": "string",
          "description": "Human-readable description"
        },
        "ai_context": {
          "$ref": "#/$defs/AIContext"
        },
        "fields": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/Field"
          }
        },
        "custom_extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/CustomExtension"
          }
        }
      },
      "required": ["name", "source"],
      "additionalProperties": false
    },
    "Relationship": {
      "type": "object",
      "description": "Foreign key relationship between datasets",
      "properties": {
        "name": {
          "type": "string",
          "description": "Unique identifier for the relationship"
        },
        "from": {
          "type": "string",
          "description": "Dataset on the many side of the relationship"
        },
        "to": {
          "type": "string",
          "description": "Dataset on the one side of the relationship"
        },
        "from_columns": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "minItems": 1,
          "description": "Foreign key columns in the 'from' dataset"
        },
        "to_columns": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "minItems": 1,
          "description": "Primary/unique key columns in the 'to' dataset"
        },
        "ai_context": {
          "$ref": "#/$defs/AIContext"
        },
        "custom_extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/CustomExtension"
          }
        }
      },
      "required": ["name", "from", "to", "from_columns", "to_columns"],
      "additionalProperties": false
    },
    "Metric": {
      "type": "object",
      "description": "Quantitative measure defined on business data",
      "properties": {
        "name": {
          "type": "string",
          "description": "Unique identifier for the metric"
        },
        "expression": {
          "$ref": "#/$defs/Expression"
        },
        "description": {
          "type": "string",
          "description": "Human-readable description of what the metric measures"
        },
        "ai_context": {
          "$ref": "#/$defs/AIContext"
        },
        "custom_extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/CustomExtension"
          }
        }
      },
      "required": ["name", "expression"],
      "additionalProperties": false
    },
    "SemanticModel": {
      "type": "object",
      "description": "Top-level container representing a complete semantic model",
      "properties": {
        "name": {
          "type": "string",
          "description": "Unique identifier for the semantic model"
        },
        "description": {
          "type": "string",
          "description": "Human-readable description"
        },
        "ai_context": {
          "$ref": "#/$defs/AIContext"
        },
        "datasets": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/Dataset"
          },
          "minItems": 1,
          "description": "Collection of logical datasets"
        },
        "relationships": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/Relationship"
          },
          "description": "Defines how datasets are connected"
        },
        "metrics": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/Metric"
          },
          "description": "Quantifiable measures spanning datasets"
        },
        "custom_extensions": {
          "type": "array",
          "items": {
            "$ref": "#/$defs/CustomExtension"
          }
        }
      },
      "required": ["name", "datasets"],
      "additionalProperties": false
    }
  }
}

View source