skip to Main Content

I want to execute POST URLRequest but first I need to create request body.
The body should look like this:

{
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": <mutableStringValue>
        },
        {
          "type": "image_url",
          "image_url": {
            "url": <mutableStringValue>
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

I cannot hardcore this as .json file because parameters "text" (under first "type") and "image_url" (under second "type") are given as function parameters.
I don’t know how to handle two "type" keys.

As of now, I came up with something like this:

struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
        
        struct Message: Codable {
            let role: String
            let content: [Content]
        }
        struct Content: Codable {
            let type: String
            let text: String?
        }
    }

But here I’m missing second "type" and "image_url" content

2

Answers


  1. You could try something simple like this:

    struct Content: Identifiable, Codable {
        let id = UUID()
        let type: String
        var text: String?
        var imageUrl: ImgUrl?
        
        enum CodingKeys: String, CodingKey {
            case type, text
            case imageUrl = "image_url"
        }
    
        // where value is the text or the url string
        // depending on the type given (which could be an enum)
        init(type: String, value: String? = nil) {
            self.type = type
            self.text = nil
            self.imageUrl = nil
        
            if type == "text" {
                self.text = value
            } else {
                if value != nil {
                    self.imageUrl = ImgUrl(url: value!)
                }
            }
        }
        
        public func encode(to encoder: Encoder) throws {
            var container = encoder.container(keyedBy: CodingKeys.self)
            try container.encode(type, forKey: .type)
            if type == "text" {
                try container.encode(text, forKey: .text)
            } else {
                try container.encode(imageUrl, forKey: .imageUrl)
            }
        }
    }
    
    struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
    }
    
    struct Message: Codable {
        let role: String
        let content: [Content]
    }
    
    struct ImgUrl: Codable {
        let url: String
    }
    

    Note, you need to add two Content to the Message object.

    For example:

    func getTestRequestBody(text: String? = nil, url: String? = nil) -> ImageInputRequestBody {
        return ImageInputRequestBody(messages: [
            Message(role: "user", content: [
                Content(type: "text", value: text),
                Content(type: "image_url", value: url)
            ])
        ])
    }
    
    Login or Signup to reply.
  2. As far as I remember, if your task is to only create the json mentioned in the question, you can simply make a multiline string, and then decode it into Data.

    let value1 = "your text"
    let value2 = "your url"
    let text = """
        {
          "model": "gpt-4-vision-preview",
          "messages": [
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": "(value1)"
                },
                {
                  "type": "image_url",
                  "image_url": {
                    "url": "(value2)"
                  }
                }
              ]
            }
          ],
          "max_tokens": 300
        }
        """
    let encoded = text.data(using: .utf8)!
    

    Or it can be a #"" string, to make the code more compact.

    If such JSONs may have a random number of items in "content", I would make an enum with 2 cases for text and image, and implement Encodable‘s method to convert them properly. For instance

    enum Content: Encodable {
        case text(String)
        case imageURL(URL)
        
        private enum CodingKeys: String, CodingKey {
            case type
            case text
            case imageURL = "image_url"
        }
        
        private enum ImageURLCodingKeys: CodingKey {
            case url
        }
        
        func encode(to encoder: Encoder) throws {
            var container = encoder.container(keyedBy: CodingKeys.self)
            switch self {
            case .text(let text):
                try container.encode("text", forKey: .type)
                try container.encode(text, forKey: .text)
            case .imageURL(let url):
                try container.encode("image_url", forKey: .type)
                var nestedContainer = container.nestedContainer(keyedBy: ImageURLCodingKeys.self, forKey: .imageURL)
                try nestedContainer.encode(url.absoluteString, forKey: .url)
            }
        }
    }
    

    I also wanted to offer a more objective way, but for some reason Apple has made encode(_:) a generic method, and to do the following code, you would need to use another JSON parser:

    struct TextContent: Encodable {
        let text: String
        
        private enum CodingKeys: String, CodingKey {
            case type
            case text
        }
        
        func encode(to encoder: Encoder) throws {
            var container = encoder.container(keyedBy: CodingKeys.self)
            try container.encode("text", forKey: .type)
            try container.encode(text, forKey: .text)
        }
    }
    
    struct ImageURLContent: Encodable {
        let url: URL
        
        private enum CodingKeys: String, CodingKey {
            case type
            case imageURL = "image_url"
        }
        
        private enum ImageURLCodingKeys: CodingKey {
            case url
        }
        
        func encode(to encoder: Encoder) throws {
            var container = encoder.container(keyedBy: CodingKeys.self)
            try container.encode("image_url", forKey: .type)
            var payloadContainer = container.nestedContainer(keyedBy: ImageURLCodingKeys.self, forKey: .imageURL)
            try payloadContainer.encode(url.absoluteString, forKey: .url)
        }
    }
    
    let content: [Encodable] = [
        TextContent(text: "your text"),
        ImageURLContent(url: URL(string: "https://test.com")!)
    ]
    let data = SomeThirdPartyEncoder().encode(content) // JSONEncoder doesn't work with such the array, unfortunately
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search