curl --request GET \
--url https://api.cozmox.ai/workflow \
--header 'Authorization: Bearer <token>'[
{
"nodes": [
{
"type": "conversation",
"name": "<string>",
"model": {
"provider": "openai",
"model": "gpt-4.1-2025-04-14",
"temperature": 1,
"maxTokens": 5025
},
"transcriber": {
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true,
"fallbackPlan": {
"transcribers": [
{
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true
}
]
}
},
"voice": {
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
},
"speed": 1.25,
"fallbackPlan": {
"voices": [
{
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"speed": 1.25,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
}
}
]
}
},
"prompt": "<string>",
"globalNodePlan": {
"enabled": false,
"enterCondition": ""
},
"variableExtractionPlan": {
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"aliases": [
{
"key": "<string>",
"value": "<string>"
}
]
},
"isStart": true,
"metadata": {}
}
],
"id": "<string>",
"orgId": "<string>",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"name": "<string>",
"edges": [
{
"from": "<string>",
"to": "<string>",
"condition": {
"type": "ai",
"prompt": "<string>"
},
"metadata": {}
}
],
"transcriber": {
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true,
"fallbackPlan": {
"transcribers": [
{
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true
}
]
}
},
"voice": {
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
},
"speed": 1.25,
"fallbackPlan": {
"voices": [
{
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"speed": 1.25,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
}
}
]
}
},
"observabilityPlan": {
"provider": "langfuse",
"tags": [
"<string>"
],
"metadata": {}
},
"backgroundSound": "office",
"credentials": [
{
"provider": "anthropic",
"apiKey": "<string>",
"name": "<string>"
}
],
"globalPrompt": "<string>",
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"headers": {},
"backoffPlan": {
"type": "fixed",
"maxRetries": 0,
"baseDelaySeconds": 1
}
},
"compliancePlan": {
"hipaaEnabled": {
"hipaaEnabled": false
},
"pciEnabled": {
"pciEnabled": false
}
},
"analysisPlan": {
"minMessagesThreshold": 1,
"summaryPlan": {
"messages": [
{}
],
"enabled": true,
"timeoutSeconds": 30.5
},
"structuredDataPlan": {
"messages": [
{}
],
"enabled": true,
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"timeoutSeconds": 30.5
},
"structuredDataMultiPlan": [
{
"key": "<string>",
"plan": {
"messages": [
{}
],
"enabled": true,
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"timeoutSeconds": 30.5
}
}
],
"successEvaluationPlan": {
"rubric": "NumericScale",
"messages": [
{}
],
"enabled": true,
"timeoutSeconds": 30.5
}
},
"artifactPlan": {
"recordingEnabled": true,
"recordingFormat": "wav;l16",
"videoRecordingEnabled": false,
"pcapEnabled": true,
"pcapS3PathPrefix": "/pcaps",
"transcriptPlan": {
"enabled": true,
"assistantName": "<string>",
"userName": "<string>"
},
"recordingPath": "<string>"
},
"startSpeakingPlan": {
"waitSeconds": 0.4,
"smartEndpointingEnabled": false,
"smartEndpointingPlan": {
"provider": "cozmox"
},
"customEndpointingRules": [
{
"type": "assistant",
"regex": "<string>",
"timeoutSeconds": 7.5,
"regexOptions": [
{
"type": "ignore-case",
"enabled": true
}
]
}
],
"transcriptionEndpointingPlan": {
"onPunctuationSeconds": 0.1,
"onNoPunctuationSeconds": 1.5,
"onNumberSeconds": 0.5
}
},
"stopSpeakingPlan": {
"numWords": 0,
"voiceSeconds": 0.2,
"backoffSeconds": 1,
"acknowledgementPhrases": [
"i understand",
"i see",
"i got it",
"i hear you",
"im listening",
"im with you",
"right",
"okay",
"ok",
"sure",
"alright",
"got it",
"understood",
"yeah",
"yes",
"uh-huh",
"mm-hmm",
"gotcha",
"mhmm",
"ah",
"yeah okay",
"yeah sure"
],
"interruptionPhrases": [
"stop",
"shut",
"up",
"enough",
"quiet",
"silence",
"but",
"dont",
"not",
"no",
"hold",
"wait",
"cut",
"pause",
"nope",
"nah",
"nevermind",
"never",
"bad",
"actually"
]
},
"monitorPlan": {
"listenEnabled": false,
"listenAuthenticationEnabled": false,
"controlEnabled": false,
"controlAuthenticationEnabled": false
},
"backgroundSpeechDenoisingPlan": {
"smartDenoisingPlan": {
"enabled": false
},
"fourierDenoisingPlan": {
"enabled": false,
"mediaDetectionEnabled": true,
"staticThreshold": -35,
"baselineOffsetDb": -15,
"windowSizeMs": 3000,
"baselinePercentile": 85
}
},
"credentialIds": [
"<string>"
]
}
]curl --request GET \
--url https://api.cozmox.ai/workflow \
--header 'Authorization: Bearer <token>'[
{
"nodes": [
{
"type": "conversation",
"name": "<string>",
"model": {
"provider": "openai",
"model": "gpt-4.1-2025-04-14",
"temperature": 1,
"maxTokens": 5025
},
"transcriber": {
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true,
"fallbackPlan": {
"transcribers": [
{
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true
}
]
}
},
"voice": {
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
},
"speed": 1.25,
"fallbackPlan": {
"voices": [
{
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"speed": 1.25,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
}
}
]
}
},
"prompt": "<string>",
"globalNodePlan": {
"enabled": false,
"enterCondition": ""
},
"variableExtractionPlan": {
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"aliases": [
{
"key": "<string>",
"value": "<string>"
}
]
},
"isStart": true,
"metadata": {}
}
],
"id": "<string>",
"orgId": "<string>",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"name": "<string>",
"edges": [
{
"from": "<string>",
"to": "<string>",
"condition": {
"type": "ai",
"prompt": "<string>"
},
"metadata": {}
}
],
"transcriber": {
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true,
"fallbackPlan": {
"transcribers": [
{
"provider": "assembly-ai",
"language": "en",
"confidenceThreshold": 0.4,
"enableUniversalStreamingApi": false,
"formatTurns": false,
"endOfTurnConfidenceThreshold": 0.7,
"minEndOfTurnSilenceWhenConfident": 160,
"wordFinalizationMaxWaitTime": 160,
"maxTurnSilence": 400,
"realtimeUrl": "<string>",
"wordBoost": [
"<string>"
],
"endUtteranceSilenceThreshold": 123,
"disablePartialTranscripts": true
}
]
}
},
"voice": {
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
},
"speed": 1.25,
"fallbackPlan": {
"voices": [
{
"provider": "azure",
"voiceId": "andrew",
"cachingEnabled": true,
"speed": 1.25,
"chunkPlan": {
"enabled": true,
"minCharacters": 30,
"punctuationBoundaries": [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
],
"formatPlan": {
"enabled": true,
"numberToDigitsCutoff": 2025,
"replacements": [
{
"type": "exact",
"key": "<string>",
"value": "<string>",
"replaceAllEnabled": false
}
],
"formattersEnabled": "markdown"
}
}
}
]
}
},
"observabilityPlan": {
"provider": "langfuse",
"tags": [
"<string>"
],
"metadata": {}
},
"backgroundSound": "office",
"credentials": [
{
"provider": "anthropic",
"apiKey": "<string>",
"name": "<string>"
}
],
"globalPrompt": "<string>",
"server": {
"timeoutSeconds": 20,
"url": "<string>",
"headers": {},
"backoffPlan": {
"type": "fixed",
"maxRetries": 0,
"baseDelaySeconds": 1
}
},
"compliancePlan": {
"hipaaEnabled": {
"hipaaEnabled": false
},
"pciEnabled": {
"pciEnabled": false
}
},
"analysisPlan": {
"minMessagesThreshold": 1,
"summaryPlan": {
"messages": [
{}
],
"enabled": true,
"timeoutSeconds": 30.5
},
"structuredDataPlan": {
"messages": [
{}
],
"enabled": true,
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"timeoutSeconds": 30.5
},
"structuredDataMultiPlan": [
{
"key": "<string>",
"plan": {
"messages": [
{}
],
"enabled": true,
"schema": {
"type": "string",
"items": {},
"properties": {},
"description": "<string>",
"pattern": "<string>",
"format": "date-time",
"required": [
"<string>"
],
"enum": [
"<string>"
],
"title": "<string>"
},
"timeoutSeconds": 30.5
}
}
],
"successEvaluationPlan": {
"rubric": "NumericScale",
"messages": [
{}
],
"enabled": true,
"timeoutSeconds": 30.5
}
},
"artifactPlan": {
"recordingEnabled": true,
"recordingFormat": "wav;l16",
"videoRecordingEnabled": false,
"pcapEnabled": true,
"pcapS3PathPrefix": "/pcaps",
"transcriptPlan": {
"enabled": true,
"assistantName": "<string>",
"userName": "<string>"
},
"recordingPath": "<string>"
},
"startSpeakingPlan": {
"waitSeconds": 0.4,
"smartEndpointingEnabled": false,
"smartEndpointingPlan": {
"provider": "cozmox"
},
"customEndpointingRules": [
{
"type": "assistant",
"regex": "<string>",
"timeoutSeconds": 7.5,
"regexOptions": [
{
"type": "ignore-case",
"enabled": true
}
]
}
],
"transcriptionEndpointingPlan": {
"onPunctuationSeconds": 0.1,
"onNoPunctuationSeconds": 1.5,
"onNumberSeconds": 0.5
}
},
"stopSpeakingPlan": {
"numWords": 0,
"voiceSeconds": 0.2,
"backoffSeconds": 1,
"acknowledgementPhrases": [
"i understand",
"i see",
"i got it",
"i hear you",
"im listening",
"im with you",
"right",
"okay",
"ok",
"sure",
"alright",
"got it",
"understood",
"yeah",
"yes",
"uh-huh",
"mm-hmm",
"gotcha",
"mhmm",
"ah",
"yeah okay",
"yeah sure"
],
"interruptionPhrases": [
"stop",
"shut",
"up",
"enough",
"quiet",
"silence",
"but",
"dont",
"not",
"no",
"hold",
"wait",
"cut",
"pause",
"nope",
"nah",
"nevermind",
"never",
"bad",
"actually"
]
},
"monitorPlan": {
"listenEnabled": false,
"listenAuthenticationEnabled": false,
"controlEnabled": false,
"controlAuthenticationEnabled": false
},
"backgroundSpeechDenoisingPlan": {
"smartDenoisingPlan": {
"enabled": false
},
"fourierDenoisingPlan": {
"enabled": false,
"mediaDetectionEnabled": true,
"staticThreshold": -35,
"baselineOffsetDb": -15,
"windowSizeMs": 3000,
"baselinePercentile": 85
}
},
"credentialIds": [
"<string>"
]
}
]Show child attributes
This is the Conversation node. This can be used to start a conversation with the customer.
The flow is:
prompt and global context.conversation 80This is the model for the node.
This overrides workflow.model.
Show child attributes
This is the provider of the model (openai).
openai This is the OpenAI model that will be used.
When using cozmox OpenAI or your own Azure Credentials, you have the option to specify the region for the selected model. This shouldn't be specified unless you have a specific reason to do so. cozmox will automatically find the fastest region that make sense. This is helpful when you are required to comply with Data Residency rules. Learn more about Azure regions here https://azure.microsoft.com/en-us/explore/global-infrastructure/data-residency/.
gpt-4.1-2025-04-14, gpt-4.1-mini-2025-04-14, gpt-4.1-nano-2025-04-14, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4.5-preview, chatgpt-4o-latest, o3, o3-mini, o4-mini, o1-preview, o1-preview-2024-09-12, o1-mini, o1-mini-2024-09-12, gpt-4o-mini-2024-07-18, gpt-4o-mini, gpt-4o, gpt-4o-2024-05-13, gpt-4o-2024-08-06, gpt-4o-2024-11-20, gpt-4-turbo, gpt-4-turbo-2024-04-09, gpt-4-turbo-preview, gpt-4-0125-preview, gpt-4-1106-preview, gpt-4, gpt-4-0613, gpt-3.5-turbo, gpt-3.5-turbo-0125, gpt-3.5-turbo-1106, gpt-3.5-turbo-16k, gpt-3.5-turbo-0613, gpt-4.1-2025-04-14:westus, gpt-4.1-2025-04-14:eastus2, gpt-4.1-2025-04-14:eastus, gpt-4.1-2025-04-14:westus3, gpt-4.1-2025-04-14:northcentralus, gpt-4.1-2025-04-14:southcentralus, gpt-4.1-mini-2025-04-14:westus, gpt-4.1-mini-2025-04-14:eastus2, gpt-4.1-mini-2025-04-14:eastus, gpt-4.1-mini-2025-04-14:westus3, gpt-4.1-mini-2025-04-14:northcentralus, gpt-4.1-mini-2025-04-14:southcentralus, gpt-4.1-nano-2025-04-14:westus, gpt-4.1-nano-2025-04-14:eastus2, gpt-4.1-nano-2025-04-14:westus3, gpt-4.1-nano-2025-04-14:northcentralus, gpt-4.1-nano-2025-04-14:southcentralus, gpt-4o-2024-11-20:swedencentral, gpt-4o-2024-11-20:westus, gpt-4o-2024-11-20:eastus2, gpt-4o-2024-11-20:eastus, gpt-4o-2024-11-20:westus3, gpt-4o-2024-11-20:southcentralus, gpt-4o-2024-08-06:westus, gpt-4o-2024-08-06:westus3, gpt-4o-2024-08-06:eastus, gpt-4o-2024-08-06:eastus2, gpt-4o-2024-08-06:northcentralus, gpt-4o-2024-08-06:southcentralus, gpt-4o-mini-2024-07-18:westus, gpt-4o-mini-2024-07-18:westus3, gpt-4o-mini-2024-07-18:eastus, gpt-4o-mini-2024-07-18:eastus2, gpt-4o-mini-2024-07-18:northcentralus, gpt-4o-mini-2024-07-18:southcentralus, gpt-4o-2024-05-13:eastus2, gpt-4o-2024-05-13:eastus, gpt-4o-2024-05-13:northcentralus, gpt-4o-2024-05-13:southcentralus, gpt-4o-2024-05-13:westus3, gpt-4o-2024-05-13:westus, gpt-4-turbo-2024-04-09:eastus2, gpt-4-0125-preview:eastus, gpt-4-0125-preview:northcentralus, gpt-4-0125-preview:southcentralus, gpt-4-1106-preview:australia, gpt-4-1106-preview:canadaeast, gpt-4-1106-preview:france, gpt-4-1106-preview:india, gpt-4-1106-preview:norway, gpt-4-1106-preview:swedencentral, gpt-4-1106-preview:uk, gpt-4-1106-preview:westus, gpt-4-1106-preview:westus3, gpt-4-0613:canadaeast, gpt-3.5-turbo-0125:canadaeast, gpt-3.5-turbo-0125:northcentralus, gpt-3.5-turbo-0125:southcentralus, gpt-3.5-turbo-1106:canadaeast, gpt-3.5-turbo-1106:westus 100This is the temperature of the model.
0 <= x <= 2This is the max tokens of the model.
50 <= x <= 10000This is the transcriber for the node.
This overrides workflow.transcriber.
Show child attributes
This is the transcription provider that will be used.
assembly-ai This is the language that will be set for the transcription.
en Transcripts below this confidence threshold will be discarded.
@default 0.4
0 <= x <= 10.4
Uses Assembly AI's new Universal Streaming API. See: https://www.assemblyai.com/docs/speech-to-text/universal-streaming
@default false
false
This enables formatting of transcripts. Only used when enableUniversalStreamingApi is true.
@default false
false
The confidence threshold to use when determining if the end of a turn has been reached. Only used when enableUniversalStreamingApi is true.
@default 0.7
0 <= x <= 10.7
The minimum amount of silence in milliseconds required to detect end of turn when confident. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum wait time for word finalization. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered. Only used when enableUniversalStreamingApi is true.
@default 400
x >= 0400
The WebSocket URL that the transcriber connects to.
Add up to 2500 characters of custom vocabulary.
2500The duration of the end utterance silence threshold in milliseconds.
Disable partial transcripts.
Set to true to not receive partial transcripts. Defaults to false.
This is the plan for voice provider fallbacks in the event that the primary voice provider fails.
Show child attributes
Show child attributes
This is the transcription provider that will be used.
assembly-ai This is the language that will be set for the transcription.
en Transcripts below this confidence threshold will be discarded.
@default 0.4
0 <= x <= 10.4
Uses Assembly AI's new Universal Streaming API. See: https://www.assemblyai.com/docs/speech-to-text/universal-streaming
@default false
false
This enables formatting of transcripts. Only used when enableUniversalStreamingApi is true.
@default false
false
The confidence threshold to use when determining if the end of a turn has been reached. Only used when enableUniversalStreamingApi is true.
@default 0.7
0 <= x <= 10.7
The minimum amount of silence in milliseconds required to detect end of turn when confident. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum wait time for word finalization. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered. Only used when enableUniversalStreamingApi is true.
@default 400
x >= 0400
The WebSocket URL that the transcriber connects to.
Add up to 2500 characters of custom vocabulary.
2500The duration of the end utterance silence threshold in milliseconds.
Disable partial transcripts.
Set to true to not receive partial transcripts. Defaults to false.
This is the voice for the node.
This overrides workflow.voice.
Show child attributes
This is the voice provider that will be used.
azure This is the provider-specific ID that will be used.
andrew, brian, emma This is the flag to toggle voice caching for the assistant.
true
This is the plan for chunking the model output before it is sent to the voice provider.
Show child attributes
This determines whether the model output is chunked before being sent to the voice provider. Default true.
Usage:
false.true.If disabled, cozmox-provided audio control tokens like
@default true
true
This is the minimum number of characters in a chunk.
Usage:
@default 30
1 <= x <= 8030
These are the punctuations that are considered valid boundaries for a chunk to be created.
Usage:
Default is automatically set to balance the trade-off between quality and latency based on the provider.
。, ,, ., !, ?, ;, ), ،, ۔, ।, ॥, |, ||, ,, : [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
]This is the plan for formatting the chunk before it is sent to the voice provider.
Show child attributes
This determines whether the chunk is formatted before being sent to the voice provider. This helps with enunciation. This includes phone numbers, emails and addresses. Default true.
Usage:
false.If voice.chunkPlan.enabled is false, this is automatically false since there's no chunk to format.
@default true
true
This is the cutoff after which a number is converted to individual digits instead of being spoken as words.
Example:
Usage:
@default 2025
x >= 02025
These are the custom replacements you can make to the chunk before it is sent to the voice provider.
Usage:
ExactReplacement type. Eg. { type: 'exact', key: 'hello', value: 'hi' }RegexReplacement type. Eg. { type: 'regex', regex: '\\b[a-zA-Z]{5}\\b', value: 'hi' }@default []
Show child attributes
This is the exact replacement type. You can use this to replace a specific word or phrase with a different word or phrase.
Usage:
exact This is the key to replace.
This is the value that will replace the match.
1000This option let's you control whether to replace all instances of the key or only the first one. By default, it only replaces the first instance. Examples:
List of formatters to apply. If not provided, all default formatters will be applied. If provided, only the specified formatters will be applied. Note: Some essential formatters like angle bracket removal will always be applied. @default undefined
markdown, asterisk, quote, dash, newline, colon, acronym, dollarAmount, email, date, time, distance, unit, percentage, phoneNumber, number, stripAsterisk This is the speed multiplier that will be used.
0.5 <= x <= 2This is the plan for voice provider fallbacks in the event that the primary voice provider fails.
Show child attributes
This is the list of voices to fallback to in the event that the primary voice provider fails.
Show child attributes
This is the voice provider that will be used.
azure This is the provider-specific ID that will be used.
andrew, brian, emma This is the flag to toggle voice caching for the assistant.
true
This is the speed multiplier that will be used.
0.5 <= x <= 2This is the plan for chunking the model output before it is sent to the voice provider.
Show child attributes
This determines whether the model output is chunked before being sent to the voice provider. Default true.
Usage:
false.true.If disabled, cozmox-provided audio control tokens like
@default true
true
This is the minimum number of characters in a chunk.
Usage:
@default 30
1 <= x <= 8030
These are the punctuations that are considered valid boundaries for a chunk to be created.
Usage:
Default is automatically set to balance the trade-off between quality and latency based on the provider.
。, ,, ., !, ?, ;, ), ،, ۔, ।, ॥, |, ||, ,, : [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
]This is the plan for formatting the chunk before it is sent to the voice provider.
Show child attributes
This determines whether the chunk is formatted before being sent to the voice provider. This helps with enunciation. This includes phone numbers, emails and addresses. Default true.
Usage:
false.If voice.chunkPlan.enabled is false, this is automatically false since there's no chunk to format.
@default true
true
This is the cutoff after which a number is converted to individual digits instead of being spoken as words.
Example:
Usage:
@default 2025
x >= 02025
These are the custom replacements you can make to the chunk before it is sent to the voice provider.
Usage:
ExactReplacement type. Eg. { type: 'exact', key: 'hello', value: 'hi' }RegexReplacement type. Eg. { type: 'regex', regex: '\\b[a-zA-Z]{5}\\b', value: 'hi' }@default []
Show child attributes
This is the exact replacement type. You can use this to replace a specific word or phrase with a different word or phrase.
Usage:
exact This is the key to replace.
This is the value that will replace the match.
1000This option let's you control whether to replace all instances of the key or only the first one. By default, it only replaces the first instance. Examples:
List of formatters to apply. If not provided, all default formatters will be applied. If provided, only the specified formatters will be applied. Note: Some essential formatters like angle bracket removal will always be applied. @default undefined
markdown, asterisk, quote, dash, newline, colon, acronym, dollarAmount, email, date, time, distance, unit, percentage, phoneNumber, number, stripAsterisk 5000This is the plan for the global node.
Show child attributes
This is the flag to determine if this node is a global node
@default false
This is the condition that will be checked to determine if the global node should be executed.
@default ''
1000This is the plan that controls the variable extraction from the user's responses.
Usage:
Use schema to specify what you want to extract from the user's responses.
{
"schema": {
"type": "object",
"properties": {
"user": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
}
}
}
}
}This will be extracted as {{ user.name }} and {{ user.age }} respectively.
(Optional) Use aliases to create new variables.
{
"aliases": [
{
"key": "userAge",
"value": "{{user.age}}"
},
{
"key": "userName",
"value": "{{user.name}}"
}
]
}This will be extracted as {{ userAge }} and {{ userName }} respectively.
Note: The schema field is required for Conversation nodes if you want to extract variables from the user's responses. aliases is just a convenience.
Show child attributes
This is the schema to extract.
Examples:
{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
}
}
}These will be extracted as {{ name }} and {{ age }} respectively. To emphasize, object properties are extracted as direct global variables.
{
"type": "object",
"properties": {
"name": {
"type": "object",
"properties": {
"first": {
"type": "string"
},
"last": {
"type": "string"
}
}
}
}
}These will be extracted as {{ name }}. And, {{ name.first }} and {{ name.last }} will be accessible.
{
"type": "array",
"title": "zipCodes",
"items": {
"type": "string"
}
}This will be extracted as {{ zipCodes }}. To access the array items, you can use {{ zipCodes[0] }} and {{ zipCodes[1] }}.
{
"type": "array",
"name": "people",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"age": {
"type": "number"
},
"zipCodes": {
"type": "array",
"items": {
"type": "string"
}
}
}
}
}This will be extracted as {{ people }}. To access the array items, you can use {{ people[n].name }}, {{ people[n].age }}, {{ people[n].zipCodes }}, {{ people[n].zipCodes[0] }} and {{ people[n].zipCodes[1] }}.
Show child attributes
This is the type of output you'd like.
string, number, integer, boolean are the primitive types and should be obvious.
array and object are more interesting and quite powerful. They allow you to define nested structures.
For array, you can define the schema of the items in the array using the items property.
For object, you can define the properties of the object using the properties property.
string, number, integer, boolean, array, object This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is the pattern of the string. This is a regex that will be used to validate the data in question. To use a common format, use the format property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs#supported-properties
This is the format of the string. To pass a regex, use the pattern property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat&type-restrictions=string-restrictions
date-time, time, date, duration, email, hostname, ipv4, ipv6, uuid This is a list of properties that are required.
This only makes sense if the type is "object".
This array specifies the allowed values that can be used to restrict the output of the model.
This is the title of the schema.
These are additional variables to create.
These will be accessible during the call as {{key}} and stored in call.artifact.variableValues after the call.
Example:
{
"aliases": [
{
"key": "customerName",
"value": "{{name}}"
},
{
"key": "fullName",
"value": "{{firstName}} {{lastName}}"
},
{
"key": "greeting",
"value": "Hello {{name}}, welcome to {{company}}!"
},
{
"key": "customerEmail",
"value": "{{addresses[0].city}}"
},
{
"key": "something",
"value": "{{any liquid}}"
}
]
}This will create variables customerName, fullName, customerEmail, greeting, and something. To access these variables, you can reference them as {{customerName}}, {{fullName}}, {{customerEmail}}, {{greeting}}, and {{something}}.
Show child attributes
This is the key of the variable.
This variable will be accessible during the call as {{key}} and stored in call.artifact.variableValues after the call.
Rules:
1 - 40This is the value of the variable.
This can reference existing variables, use filters, and perform transformations.
Examples: "{{name}}", "{{customer.email}}", "Hello {{name | upcase}}"
10000This is whether or not the node is the start of the workflow.
This is for metadata you want to store on the task.
80Show child attributes
8080This is for metadata you want to store on the edge.
This is the transcriber for the workflow.
This can be overridden at node level using nodes[n].transcriber.
Show child attributes
This is the transcription provider that will be used.
assembly-ai This is the language that will be set for the transcription.
en Transcripts below this confidence threshold will be discarded.
@default 0.4
0 <= x <= 10.4
Uses Assembly AI's new Universal Streaming API. See: https://www.assemblyai.com/docs/speech-to-text/universal-streaming
@default false
false
This enables formatting of transcripts. Only used when enableUniversalStreamingApi is true.
@default false
false
The confidence threshold to use when determining if the end of a turn has been reached. Only used when enableUniversalStreamingApi is true.
@default 0.7
0 <= x <= 10.7
The minimum amount of silence in milliseconds required to detect end of turn when confident. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum wait time for word finalization. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered. Only used when enableUniversalStreamingApi is true.
@default 400
x >= 0400
The WebSocket URL that the transcriber connects to.
Add up to 2500 characters of custom vocabulary.
2500The duration of the end utterance silence threshold in milliseconds.
Disable partial transcripts.
Set to true to not receive partial transcripts. Defaults to false.
This is the plan for voice provider fallbacks in the event that the primary voice provider fails.
Show child attributes
Show child attributes
This is the transcription provider that will be used.
assembly-ai This is the language that will be set for the transcription.
en Transcripts below this confidence threshold will be discarded.
@default 0.4
0 <= x <= 10.4
Uses Assembly AI's new Universal Streaming API. See: https://www.assemblyai.com/docs/speech-to-text/universal-streaming
@default false
false
This enables formatting of transcripts. Only used when enableUniversalStreamingApi is true.
@default false
false
The confidence threshold to use when determining if the end of a turn has been reached. Only used when enableUniversalStreamingApi is true.
@default 0.7
0 <= x <= 10.7
The minimum amount of silence in milliseconds required to detect end of turn when confident. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum wait time for word finalization. Only used when enableUniversalStreamingApi is true.
@default 160
x >= 0160
The maximum amount of silence in milliseconds allowed in a turn before end of turn is triggered. Only used when enableUniversalStreamingApi is true.
@default 400
x >= 0400
The WebSocket URL that the transcriber connects to.
Add up to 2500 characters of custom vocabulary.
2500The duration of the end utterance silence threshold in milliseconds.
Disable partial transcripts.
Set to true to not receive partial transcripts. Defaults to false.
This is the voice for the workflow.
This can be overridden at node level using nodes[n].voice.
Show child attributes
This is the voice provider that will be used.
azure This is the provider-specific ID that will be used.
andrew, brian, emma This is the flag to toggle voice caching for the assistant.
true
This is the plan for chunking the model output before it is sent to the voice provider.
Show child attributes
This determines whether the model output is chunked before being sent to the voice provider. Default true.
Usage:
false.true.If disabled, cozmox-provided audio control tokens like
@default true
true
This is the minimum number of characters in a chunk.
Usage:
@default 30
1 <= x <= 8030
These are the punctuations that are considered valid boundaries for a chunk to be created.
Usage:
Default is automatically set to balance the trade-off between quality and latency based on the provider.
。, ,, ., !, ?, ;, ), ،, ۔, ।, ॥, |, ||, ,, : [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
]This is the plan for formatting the chunk before it is sent to the voice provider.
Show child attributes
This determines whether the chunk is formatted before being sent to the voice provider. This helps with enunciation. This includes phone numbers, emails and addresses. Default true.
Usage:
false.If voice.chunkPlan.enabled is false, this is automatically false since there's no chunk to format.
@default true
true
This is the cutoff after which a number is converted to individual digits instead of being spoken as words.
Example:
Usage:
@default 2025
x >= 02025
These are the custom replacements you can make to the chunk before it is sent to the voice provider.
Usage:
ExactReplacement type. Eg. { type: 'exact', key: 'hello', value: 'hi' }RegexReplacement type. Eg. { type: 'regex', regex: '\\b[a-zA-Z]{5}\\b', value: 'hi' }@default []
Show child attributes
This is the exact replacement type. You can use this to replace a specific word or phrase with a different word or phrase.
Usage:
exact This is the key to replace.
This is the value that will replace the match.
1000This option let's you control whether to replace all instances of the key or only the first one. By default, it only replaces the first instance. Examples:
List of formatters to apply. If not provided, all default formatters will be applied. If provided, only the specified formatters will be applied. Note: Some essential formatters like angle bracket removal will always be applied. @default undefined
markdown, asterisk, quote, dash, newline, colon, acronym, dollarAmount, email, date, time, distance, unit, percentage, phoneNumber, number, stripAsterisk This is the speed multiplier that will be used.
0.5 <= x <= 2This is the plan for voice provider fallbacks in the event that the primary voice provider fails.
Show child attributes
This is the list of voices to fallback to in the event that the primary voice provider fails.
Show child attributes
This is the voice provider that will be used.
azure This is the provider-specific ID that will be used.
andrew, brian, emma This is the flag to toggle voice caching for the assistant.
true
This is the speed multiplier that will be used.
0.5 <= x <= 2This is the plan for chunking the model output before it is sent to the voice provider.
Show child attributes
This determines whether the model output is chunked before being sent to the voice provider. Default true.
Usage:
false.true.If disabled, cozmox-provided audio control tokens like
@default true
true
This is the minimum number of characters in a chunk.
Usage:
@default 30
1 <= x <= 8030
These are the punctuations that are considered valid boundaries for a chunk to be created.
Usage:
Default is automatically set to balance the trade-off between quality and latency based on the provider.
。, ,, ., !, ?, ;, ), ،, ۔, ।, ॥, |, ||, ,, : [
"。",
",",
".",
"!",
"?",
";",
"،",
"۔",
"।",
"॥",
"|",
"||",
",",
":"
]This is the plan for formatting the chunk before it is sent to the voice provider.
Show child attributes
This determines whether the chunk is formatted before being sent to the voice provider. This helps with enunciation. This includes phone numbers, emails and addresses. Default true.
Usage:
false.If voice.chunkPlan.enabled is false, this is automatically false since there's no chunk to format.
@default true
true
This is the cutoff after which a number is converted to individual digits instead of being spoken as words.
Example:
Usage:
@default 2025
x >= 02025
These are the custom replacements you can make to the chunk before it is sent to the voice provider.
Usage:
ExactReplacement type. Eg. { type: 'exact', key: 'hello', value: 'hi' }RegexReplacement type. Eg. { type: 'regex', regex: '\\b[a-zA-Z]{5}\\b', value: 'hi' }@default []
Show child attributes
This is the exact replacement type. You can use this to replace a specific word or phrase with a different word or phrase.
Usage:
exact This is the key to replace.
This is the value that will replace the match.
1000This option let's you control whether to replace all instances of the key or only the first one. By default, it only replaces the first instance. Examples:
List of formatters to apply. If not provided, all default formatters will be applied. If provided, only the specified formatters will be applied. Note: Some essential formatters like angle bracket removal will always be applied. @default undefined
markdown, asterisk, quote, dash, newline, colon, acronym, dollarAmount, email, date, time, distance, unit, percentage, phoneNumber, number, stripAsterisk This is the plan for observability of workflow's calls.
Currently, only Langfuse is supported.
Show child attributes
langfuse This is an array of tags to be added to the Langfuse trace. Tags allow you to categorize and filter traces. https://langfuse.com/docs/tracing-features/tags This is an array of tags to be added to the Langfuse trace. Tags allow you to categorize and filter traces. https://langfuse.com/docs/tracing-features/tags
This is a JSON object that will be added to the Langfuse trace. Traces can be enriched with metadata to better understand your users, application, and experiments. https://langfuse.com/docs/tracing-features/metadata By default it includes the call metadata, assistant metadata, and assistant overrides. This is a JSON object that will be added to the Langfuse trace. Traces can be enriched with metadata to better understand your users, application, and experiments. https://langfuse.com/docs/tracing-features/metadata By default it includes the call metadata, assistant metadata, and assistant overrides.
This is the background sound in the call. Default for phone calls is 'office' and default for web calls is 'off'. You can also provide a custom sound by providing a URL to an audio file.
off, office "office"
These are dynamic credentials that will be used for the workflow calls. By default, all the credentials are available for use in the call but you can supplement an additional credentials using this. Dynamic credentials override existing credentials.
Show child attributes
5000This is where cozmox will send webhooks. You can find all webhooks available along with their shape in ServerMessage schema.
The order of precedence is:
Show child attributes
This is the timeout in seconds for the request. Defaults to 20 seconds.
@default 20
1 <= x <= 30020
This is where the request will be sent.
These are the headers to include in the request.
Each key-value pair represents a header name and its value.
This is the backoff plan if the request fails. Defaults to undefined (the request will not be retried).
@default undefined (the request will not be retried)
Show child attributes
This is the type of backoff plan to use. Defaults to fixed.
@default fixed
"fixed"
This is the maximum number of retries to attempt if the request fails. Defaults to 0 (no retries).
@default 0
0 <= x <= 100
This is the base delay in seconds. For linear backoff, this is the delay between each retry. For exponential backoff, this is the initial delay.
0 <= x <= 101
This is the compliance plan for the workflow. It allows you to configure HIPAA and other compliance settings.
Show child attributes
When this is enabled, no logs, recordings, or transcriptions will be stored. At the end of the call, you will still receive an end-of-call-report message to store on your server. Defaults to false.
{ "hipaaEnabled": false }When this is enabled, the user will be restricted to use PCI-compliant providers, and no logs or transcripts are stored. At the end of the call, you will receive an end-of-call-report message to store on your server. Defaults to false.
{ "pciEnabled": false }This is the plan for analysis of workflow's calls. Stored in call.analysis.
Show child attributes
The minimum number of messages required to run the analysis plan. If the number of messages is less than this, analysis will be skipped. @default 2
x >= 0This is the plan for generating the summary of the call. This outputs to call.analysis.summary.
Show child attributes
These are the messages used to generate the summary.
@default: [ { "role": "system", "content": "You are an expert note-taker. You will be given a transcript of a call. Summarize the call in 2-3 sentences. DO NOT return anything except the summary." }, { "role": "user", "content": "Here is the transcript:\n\n{{transcript}}\n\n. Here is the ended reason of the call:\n\n{{endedReason}}\n\n" } ]
You can customize by providing any messages you want.
Here are the template variables available:
call.artifact.transcriptassistant.model.messages[type=system].contentcall.endedReasonThis determines whether a summary is generated and stored in call.analysis.summary. Defaults to true.
Usage:
@default true
This is how long the request is tried before giving up. When request times out, call.analysis.summary will be empty.
Usage:
@default 5 seconds
1 <= x <= 60This is the plan for generating the structured data from the call. This outputs to call.analysis.structuredData.
Show child attributes
These are the messages used to generate the structured data.
@default: [ { "role": "system", "content": "You are an expert data extractor. You will be given a transcript of a call. Extract structured data per the JSON Schema. DO NOT return anything except the structured data.\n\nJson Schema:\\n{{schema}}\n\nOnly respond with the JSON." }, { "role": "user", "content": "Here is the transcript:\n\n{{transcript}}\n\n. Here is the ended reason of the call:\n\n{{endedReason}}\n\n" } ]
You can customize by providing any messages you want.
Here are the template variables available:
call.artifact.transcript- {{systemPrompt}}: the system prompt of the call from assistant.model.messages[type=system].content- {{schema}}: the schema of the structured data from structuredDataPlan.schema- {{endedReason}}: the ended reason of the call from call.endedReasonThis determines whether structured data is generated and stored in call.analysis.structuredData. Defaults to false.
Usage:
schema.@default false
This is the schema of the structured data. The output is stored in call.analysis.structuredData.
Complete guide on JSON Schema can be found here.
Show child attributes
This is the type of output you'd like.
string, number, integer, boolean are the primitive types and should be obvious.
array and object are more interesting and quite powerful. They allow you to define nested structures.
For array, you can define the schema of the items in the array using the items property.
For object, you can define the properties of the object using the properties property.
string, number, integer, boolean, array, object This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is the pattern of the string. This is a regex that will be used to validate the data in question. To use a common format, use the format property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs#supported-properties
This is the format of the string. To pass a regex, use the pattern property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat&type-restrictions=string-restrictions
date-time, time, date, duration, email, hostname, ipv4, ipv6, uuid This is a list of properties that are required.
This only makes sense if the type is "object".
This array specifies the allowed values that can be used to restrict the output of the model.
This is the title of the schema.
This is how long the request is tried before giving up. When request times out, call.analysis.structuredData will be empty.
Usage:
@default 5 seconds
1 <= x <= 60This is an array of structured data plan catalogs. Each entry includes a key and a plan for generating the structured data from the call. This outputs to call.analysis.structuredDataMulti.
Show child attributes
This is the key of the structured data plan in the catalog.
This is an individual structured data plan in the catalog.
Show child attributes
These are the messages used to generate the structured data.
@default: [ { "role": "system", "content": "You are an expert data extractor. You will be given a transcript of a call. Extract structured data per the JSON Schema. DO NOT return anything except the structured data.\n\nJson Schema:\\n{{schema}}\n\nOnly respond with the JSON." }, { "role": "user", "content": "Here is the transcript:\n\n{{transcript}}\n\n. Here is the ended reason of the call:\n\n{{endedReason}}\n\n" } ]
You can customize by providing any messages you want.
Here are the template variables available:
call.artifact.transcript- {{systemPrompt}}: the system prompt of the call from assistant.model.messages[type=system].content- {{schema}}: the schema of the structured data from structuredDataPlan.schema- {{endedReason}}: the ended reason of the call from call.endedReasonThis determines whether structured data is generated and stored in call.analysis.structuredData. Defaults to false.
Usage:
schema.@default false
This is the schema of the structured data. The output is stored in call.analysis.structuredData.
Complete guide on JSON Schema can be found here.
Show child attributes
This is the type of output you'd like.
string, number, integer, boolean are the primitive types and should be obvious.
array and object are more interesting and quite powerful. They allow you to define nested structures.
For array, you can define the schema of the items in the array using the items property.
For object, you can define the properties of the object using the properties property.
string, number, integer, boolean, array, object This is required if the type is "array". This is the schema of the items in the array.
This is of type JsonSchema. However, Swagger doesn't support circular references.
This is required if the type is "object". This specifies the properties of the object.
This is a map of string to JsonSchema. However, Swagger doesn't support circular references.
This is the description to help the model understand what it needs to output.
This is the pattern of the string. This is a regex that will be used to validate the data in question. To use a common format, use the format property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs#supported-properties
This is the format of the string. To pass a regex, use the pattern property instead.
OpenAI documentation: https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat&type-restrictions=string-restrictions
date-time, time, date, duration, email, hostname, ipv4, ipv6, uuid This is a list of properties that are required.
This only makes sense if the type is "object".
This array specifies the allowed values that can be used to restrict the output of the model.
This is the title of the schema.
This is how long the request is tried before giving up. When request times out, call.analysis.structuredData will be empty.
Usage:
@default 5 seconds
1 <= x <= 60This is the plan for generating the success evaluation of the call. This outputs to call.analysis.successEvaluation.
Show child attributes
This enforces the rubric of the evaluation. The output is stored in call.analysis.successEvaluation.
Options include:
Default is 'PassFail'.
NumericScale, DescriptiveScale, Checklist, Matrix, PercentageScale, LikertScale, AutomaticRubric, PassFail These are the messages used to generate the success evaluation.
@default: [ { "role": "system", "content": "You are an expert call evaluator. You will be given a transcript of a call and the system prompt of the AI participant. Determine if the call was successful based on the objectives inferred from the system prompt. DO NOT return anything except the result.\n\nRubric:\\n{{rubric}}\n\nOnly respond with the result." }, { "role": "user", "content": "Here is the transcript:\n\n{{transcript}}\n\n" }, { "role": "user", "content": "Here was the system prompt of the call:\n\n{{systemPrompt}}\n\n. Here is the ended reason of the call:\n\n{{endedReason}}\n\n" } ]
You can customize by providing any messages you want.
Here are the template variables available:
call.artifact.transcript- {{systemPrompt}}: the system prompt of the call from assistant.model.messages[type=system].content- {{rubric}}: the rubric of the success evaluation from successEvaluationPlan.rubric- {{endedReason}}: the ended reason of the call from call.endedReasonThis determines whether a success evaluation is generated and stored in call.analysis.successEvaluation. Defaults to true.
Usage:
@default true
This is how long the request is tried before giving up. When request times out, call.analysis.successEvaluation will be empty.
Usage:
@default 5 seconds
1 <= x <= 60This is the plan for artifacts generated during workflow's calls. Stored in call.artifact.
Show child attributes
This determines whether assistant's calls are recorded. Defaults to true.
Usage:
assistant.hipaaEnabled (deprecated) or assistant.compliancePlan.hipaaEnabled explicity set this to true and make sure to provide S3 or GCP credentials on the Provider Credentials page in the Dashboard.You can find the recording at call.artifact.recordingUrl and call.artifact.stereoRecordingUrl after the call is ended.
@default true
true
This determines the format of the recording. Defaults to wav;l16.
@default 'wav;l16'
wav;l16, mp3 This determines whether the video is recorded during the call. Defaults to false. Only relevant for webCall type.
You can find the video recording at call.artifact.videoRecordingUrl after the call is ended.
@default false
false
This determines whether the SIP packet capture is enabled. Defaults to true. Only relevant for phone type calls where phone number's provider is cozmox or byo-phone-number.
You can find the packet capture at call.artifact.pcapUrl after the call is ended.
@default true
true
This is the path where the SIP packet capture will be uploaded. This is only used if you have provided S3 or GCP credentials on the Provider Credentials page in the Dashboard.
If credential.s3PathPrefix or credential.bucketPlan.path is set, this will append to it.
Usage:
/my-assistant-captures./.@default '/'
"/pcaps"
This is the plan for call.artifact.transcript. To disable, set transcriptPlan.enabled to false.
Show child attributes
This determines whether the transcript is stored in call.artifact.transcript. Defaults to true.
@default true
true
This is the name of the assistant in the transcript. Defaults to 'AI'.
Usage:
assistantName set to 'Buyer':User: Hello, how are you?
Buyer: I'm fine.
User: Do you want to buy a car?
Buyer: No.@default 'AI'
This is the name of the user in the transcript. Defaults to 'User'.
Usage:
userName set to 'Seller':Seller: Hello, how are you?
AI: I'm fine.
Seller: Do you want to buy a car?
AI: No.@default 'User'
This is the path where the recording will be uploaded. This is only used if you have provided S3 or GCP credentials on the Provider Credentials page in the Dashboard.
If credential.s3PathPrefix or credential.bucketPlan.path is set, this will append to it.
Usage:
/my-assistant-recordings./.@default '/'
This is the plan for when the workflow nodes should start talking.
You should configure this if you're running into these issues:
Show child attributes
This is how long assistant waits before speaking. Defaults to 0.4.
This is the minimum it will wait but if there is latency is the pipeline, this minimum will be exceeded. This is intended as a stopgap in case the pipeline is moving too fast.
Example:
Usage:
@default 0.4
0 <= x <= 50.4
false
This is the plan for smart endpointing. Pick between cozmox smart endpointing or LiveKit smart endpointing (or nothing). We strongly recommend using livekit endpointing when working in English. LiveKit endpointing is not supported in other languages, yet.
If this is set, it will override and take precedence over transcriptionEndpointingPlan.
This plan will still be overridden by any matching customEndpointingRules.
Show child attributes
This is the provider for the smart endpointing plan.
cozmox, livekit, custom-endpointing-model "cozmox"
These are the custom endpointing rules to set an endpointing timeout based on a regex on the customer's speech or the assistant's last message.
Usage:
These rules have the highest precedence and will override both smartEndpointingPlan and transcriptionEndpointingPlan when a rule is matched.
The rules are evaluated in order and the first one that matches will be used.
Order of precedence for endpointing:
@default []
Show child attributes
This endpointing rule is based on the last assistant message before customer started speaking.
Flow:
regex, the endpointing timeout is set to timeoutSecondsUsage:
assistant This is the regex pattern to match.
Note:
RegExp.test method in Node.JS. Eg. /hello/.test("hello there") will return true.Hot tip:
\ when sending the regex pattern. Eg. "hello\sthere" will be sent over the wire as "hellosthere". Send "hello\\sthere" instead.RegExp.test does substring matching, so /cat/.test("I love cats") will return true. To do full string matching, send "^cat$".This is the endpointing timeout in seconds, if the rule is matched.
0 <= x <= 15These are the options for the regex match. Defaults to all disabled.
@default []
Show child attributes
This is the type of the regex option. Options are:
ignore-case: Ignores the case of the text being matched. Addwhole-word: Matches whole words only.multi-line: Matches across multiple lines.ignore-case, whole-word, multi-line This is whether to enable the option.
@default false
This determines how a customer speech is considered done (endpointing) using the transcription of customer's speech.
Once an endpoint is triggered, the request is sent to assistant.model.
Note: This plan is only used if smartEndpointingPlan is not set. If both are provided, smartEndpointingPlan takes precedence.
This plan will also be overridden by any matching customEndpointingRules.
Show child attributes
The minimum number of seconds to wait after transcription ending with punctuation before sending a request to the model. Defaults to 0.1.
This setting exists because the transcriber punctuates the transcription when it's more confident that customer has completed a thought.
@default 0.1
0 <= x <= 30.1
The minimum number of seconds to wait after transcription ending without punctuation before sending a request to the model. Defaults to 1.5.
This setting exists to catch the cases where the transcriber was not confident enough to punctuate the transcription, but the customer is done and has been silent for a long time.
@default 1.5
0 <= x <= 31.5
The minimum number of seconds to wait after transcription ending with a number before sending a request to the model. Defaults to 0.4.
This setting exists because the transcriber will sometimes punctuate the transcription ending with a number, even though the customer hasn't uttered the full number. This happens commonly for long numbers when the customer reads the number in chunks.
@default 0.5
0 <= x <= 30.5
This is the plan for when workflow nodes should stop talking on customer interruption.
You should configure this if you're running into these issues:
Show child attributes
This is the number of words that the customer has to say before the assistant will stop talking.
Words like "stop", "actually", "no", etc. will always interrupt immediately regardless of this value.
Words like "okay", "yeah", "right" will never interrupt.
When set to 0, voiceSeconds is used in addition to the transcriptions to determine the customer has started speaking.
Defaults to 0.
@default 0
0 <= x <= 100
This is the seconds customer has to speak before the assistant stops talking. This uses the VAD (Voice Activity Detection) spike to determine if the customer has started speaking.
Considerations:
This is only used if numWords is set to 0.
Defaults to 0.2
@default 0.2
0 <= x <= 0.50.2
This is the seconds to wait before the assistant will start talking again after being interrupted.
Defaults to 1.
@default 1
0 <= x <= 101
These are the phrases that will never interrupt the assistant, even if numWords threshold is met. These are typically acknowledgement or backchanneling phrases.
240[
"i understand",
"i see",
"i got it",
"i hear you",
"im listening",
"im with you",
"right",
"okay",
"ok",
"sure",
"alright",
"got it",
"understood",
"yeah",
"yes",
"uh-huh",
"mm-hmm",
"gotcha",
"mhmm",
"ah",
"yeah okay",
"yeah sure"
]These are the phrases that will always interrupt the assistant immediately, regardless of numWords. These are typically phrases indicating disagreement or desire to stop.
240[
"stop",
"shut",
"up",
"enough",
"quiet",
"silence",
"but",
"dont",
"not",
"no",
"hold",
"wait",
"cut",
"pause",
"nope",
"nah",
"nevermind",
"never",
"bad",
"actually"
]This is the plan for real-time monitoring of the workflow's calls.
Usage:
monitorPlan.listenEnabled to true.monitorPlan.controlEnabled to true.Show child attributes
This determines whether the assistant's calls allow live listening. Defaults to true.
Fetch call.monitor.listenUrl to get the live listening URL.
@default true
false
This enables authentication on the call.monitor.listenUrl.
If listenAuthenticationEnabled is true, the call.monitor.listenUrl will require an Authorization: Bearer <cozmox-public-api-key> header.
@default false
false
This determines whether the assistant's calls allow live control. Defaults to true.
Fetch call.monitor.controlUrl to get the live control URL.
To use, send any control message via a POST request to call.monitor.controlUrl. Here are the types of controls supported: https://docs.cozmox.ai/api-reference/messages/client-inbound-message
@default true
false
This enables authentication on the call.monitor.controlUrl.
If controlAuthenticationEnabled is true, the call.monitor.controlUrl will require an Authorization: Bearer <cozmox-public-api-key> header.
@default false
false
This enables filtering of noise and background speech while the user is talking.
Features:
Both can be used together. Order of precedence:
Show child attributes
Whether Fourier denoising is enabled. Note that this is experimental and may not work as expected.
This can be combined with smart denoising, and will be run afterwards.
Show child attributes
Whether Fourier denoising is enabled. Note that this is experimental and may not work as expected.
Whether automatic media detection is enabled. When enabled, the filter will automatically detect consistent background TV/music/radio and switch to more aggressive filtering settings. Only applies when enabled is true.
true
Static threshold in dB used as fallback when no baseline is established.
-80 <= x <= 0-35
How far below the rolling baseline to filter audio, in dB. Lower values (e.g., -10) are more aggressive, higher values (e.g., -20) are more conservative.
-30 <= x <= -5-15
Rolling window size in milliseconds for calculating the audio baseline. Larger windows adapt more slowly but are more stable.
1000 <= x <= 300003000
Percentile to use for baseline calculation (1-99). Higher percentiles (e.g., 85) focus on louder speech, lower percentiles (e.g., 50) include quieter speech.
1 <= x <= 9985
These are the credentials that will be used for the workflow calls. By default, all the credentials are available for use in the call but you can provide a subset using this.