I’m using snowflake. This is my statement:
select
PARSE_JSON(src): "eventType"::STRING AS eventtype,
PARSE_JSON(PARSE_JSON(src): "data"::STRING): "events" AS events,
PARSE_JSON(PARSE_JSON(PARSE_JSON(src): "data"::STRING): "events"): "actor" AS actor,
PARSE_JSON(PARSE_JSON(PARSE_JSON(PARSE_JSON(src): "data"::STRING): "events"): "actor"): "actor" AS actor2,
PARSE_JSON(PARSE_JSON(PARSE_JSON(src): "data"::STRING): "events"): "client" AS client,
*
from stage.okta_events
This is the data that is in src (one of many. of course):
{
"eventType": "com.okta.event_hook",
"eventTypeVersion": "1.0",
"cloudEventsVersion": "0.1",
"source": "https://liveopsnation.okta.com/api/v1/eventHooks/whom9y9tu9HQdONDc2p7",
"eventId": "b47d9e73-324e-4f8f-8f28-f6eb17c2fba6",
"data": {
"events": [
{
"uuid": "ee0a60c9-ee38-11ed-95c3-03faaeb30680",
"published": "2023-05-09T07:12:59.759Z",
"eventType": "user.authentication.auth_via_mfa",
"version": "0",
"displayMessage": "Authentication of user via MFA",
"severity": "INFO",
"client": {
"userAgent": {
"rawUserAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
"os": "Windows 10",
"browser": "CHROME"
},
"zone": "null",
"device": "Computer",
"ipAddress": "75.181.198.11",
"geographicalContext": {
"city": "Charlotte",
"state": "North Carolina",
"country": "United States",
"postalCode": "28202",
"geolocation": {
"lat": 35.2316,
"lon": -80.8428
}
},
"ipChain": [
{
"ip": "75.181.198.11",
"geographicalContext": {
"city": "Charlotte",
"state": "North Carolina",
"country": "United States",
"postalCode": "28202",
"geolocation": {
"lat": 35.2316,
"lon": -80.8428
}
},
"version": "V4"
}
]
},
"actor": {
"id": "00uirbgg60V6N7f2T2p7",
"type": "User",
"alternateId": "[email protected]",
"displayName": "GIOVANNI ANIEKAN"
},
"outcome": {
"result": "SUCCESS"
},
"target": [
{
"id": "00uirbgg60V6N7f2T2p7",
"type": "User",
"alternateId": "[email protected]",
"displayName": "GIOVANNI ANIEKAN"
}
],
"transaction": {
"type": "WEB",
"id": "ZFnyez8n1SOXrJWBHFln0AAABlI",
"detail": {}
},
"debugContext": {
"debugData": {
"deviceFingerprint": "62f6b5f088533b8f0c043d332faf6e53",
"behaviors": "{New Geo-Location=NEGATIVE, New Device=NEGATIVE, New IP=NEGATIVE, New State=NEGATIVE, New Country=NEGATIVE, Velocity=NEGATIVE, New City=NEGATIVE}",
"promptingPolicyTypes": "[OKTA_SIGN_ON]",
"requestUri": "/api/v1/authn/factors/dsfj6rbwkd9c37tPm2p7/verify",
"targetEventHookIds": "whom9y9tu9HQdONDc2p7",
"url": "/api/v1/authn/factors/dsfj6rbwkd9c37tPm2p7/verify?",
"authnRequestId": "ZFnydnCxDN2eai4HnmnenQAAA_k",
"requestId": "ZFnyez8n1SOXrJWBHFln0AAABlI",
"dtHash": "33327866d72a3400752bef8b0acaec05c4d6f1425408f5cfc546c39902f710db",
"risk": "{level=LOW}",
"threatSuspected": "false",
"factor": "DUO_SECURITY"
}
},
"legacyEventType": "core.user.factor.attempt_success",
"authenticationContext": {
"authenticationProvider": "FACTOR_PROVIDER",
"authenticationStep": 0,
"externalSessionId": "102LoR0A7c4QLKGDxZjMwEe-A"
},
"securityContext": {
"asNumber": 11426,
"asOrg": "charter communications inc",
"isp": "charter communications inc",
"domain": "spectrum.com",
"isProxy": false
}
}
]
},
"eventTime": "2023-05-09T07:13:07.213Z",
"contentType": "application/json"
}
What I get is actual data for eventtype and events, but nulls for actor, actor2, and client.
I’m sure that the problem is with the ‘[‘ and ‘]’ characters in the json. How do I deal with those?
2
Answers
I believe this is a limitation of Snowflake PARSE_JSON().
Try the following:
In other words:
Those brackets represent Arrays in JSON. even if they only have one entry per array, you need to use a parsing syntax that handles arrays. You have a couple options.
This would be the simplest, and will work as long as you know that the events array will only ever have one record
If a record has multiple event records in one json blob (meaning you can’t just grab the "first" one), then you can use this syntax to split each into its own row in the output.
I also took the liberty of cleaning up unnecessary casting and re-parsing from your example.