skip to Main Content

I’m trying to upload files to S3 using API Gateway and Lambda, all the processes work fine until I arrive at the Lambda, my lambda looks like this:

import base64
import boto3
import os

s3_client = boto3.client('s3')
bucket_name = os.environ['S3_BUCKET_NAME']


def lambda_handler(event, context):
    contend_decode = base64.b64decode(event['body'])
    response = s3_client.put_object(Bucket=bucket_name, Body=contend_decode)
    print(response)

    return {
        'statusCode': 200,
        'body': 'File uploaded'
    }

When I upload for example an mp3 file I receive an error that says:

[ERROR] ValueError: string argument should contain only ASCII characters
Traceback (most recent call last):
  File "/var/task/lambda_function.py", line 10, in lambda_handler
    contend_decode = base64.b64decode(event['body'])
  File "/var/lang/lib/python3.8/base64.py", line 80, in b64decode
    s = _bytes_from_decode_data(s)
  File "/var/lang/lib/python3.8/base64.py", line 39, in _bytes_from_decode_data
    raise ValueError('string argument should contain only ASCII characters')
[ERROR] ValueError: string argument should contain only ASCII characters Traceback (most recent call last):   File "/var/task/lambda_function.py", line 10, in lambda_handler     contend_decode = base64.b64decode(event['body'])   File "/var/lang/lib/python3.8/base64.py", line 80, in b64decode     s = _bytes_from_decode_data(s)   File "/var/lang/lib/python3.8/base64.py", line 39, in _bytes_from_decode_data     raise ValueError('string argument should contain only ASCII characters')

Any idea about this issue, please?

Edit:

The content of the event is something like this:

{
    "resource": "/upload",
    "path": "/upload",
    "httpMethod": "POST",
    "headers": {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate, br",
        "CloudFront-Forwarded-Proto": "https",
        "CloudFront-Is-Desktop-Viewer": "true",
        "CloudFront-Is-Mobile-Viewer": "false",
        "CloudFront-Is-SmartTV-Viewer": "false",
        "CloudFront-Is-Tablet-Viewer": "false",
        "CloudFront-Viewer-ASN": "5410",
        "CloudFront-Viewer-Country": "FR",
        "Content-Type": "audio/mpeg",
        "Host": "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com",
        "Postman-Token": "fe49e15f-82c6-44c7-8399-4b6fba9b9abc",
        "User-Agent": "PostmanRuntime/7.29.2",
        "Via": "1.1 12bc6711250373a4xxxxxxxxxx44504.cloudfront.net (CloudFront)",
        "X-Amz-Cf-Id": "5Zv2MVCxxxxxxxxxxxxyzMuv_CfIAxxxxxxxxxxxxJyz4JtHb-QImYZGQ==",
        "X-Amzn-Trace-Id": "Root=1-6383d306-4e81300e0000000c3262b7a45",
        "x-api-key": "g4KOPDl5zoB0E2QBpAAXSaESDFyGkR38f000",
        "X-Forwarded-For": "1XX.XX9.2XX.XX9, 1XX.XX6.XX5.XXX",
        "X-Forwarded-Port": "443",
        "X-Forwarded-Proto": "https"
    },
    "multiValueHeaders": {
        "Accept": [
            "*/*"
        ],
        "Accept-Encoding": [
            "gzip, deflate, br"
        ],
        "CloudFront-Forwarded-Proto": [
            "https"
        ],
        "CloudFront-Is-Desktop-Viewer": [
            "true"
        ],
        "CloudFront-Is-Mobile-Viewer": [
            "false"
        ],
        "CloudFront-Is-SmartTV-Viewer": [
            "false"
        ],
        "CloudFront-Is-Tablet-Viewer": [
            "false"
        ],
        "CloudFront-Viewer-ASN": [
            "5410"
        ],
        "CloudFront-Viewer-Country": [
            "FR"
        ],
        "Content-Type": [
            "audio/mpeg"
        ],
        "Host": [
            "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com"
        ],
        "Postman-Token": [
            "fDDDDf-82c8-44c9-DDD1-4b6f9QASFF9abc"
        ],
        "User-Agent": [
            "PostmanRuntime/7.29.2"
        ],
        "Via": [
            "1.1 12bVASD16aeca2DDD44504.cloudfront.net (CloudFront)"
        ],
        "X-Amz-Cf-Id": [
            "5Zv2MVCnaDDDzMuv_CfIA6iC89CiUnjDDDAZXAb-QImYZGQ=="
        ],
        "X-Amzn-Trace-Id": [
            "Root=1-6383AZDD-4e81002e022374c326hu8a45"
        ],
        "x-api-key": [
            "g4KOPDl5zoBia3cT4pYMkynzyGkX00aa"
        ],
        "X-Forwarded-For": [
            "1XX.XX9.2XX.XX9, 1XX.XX6.XX5.XXX"
        ],
        "X-Forwarded-Port": [
            "443"
        ],
        "X-Forwarded-Proto": [
            "https"
        ]
    },
    "queryStringParameters": "None",
    "multiValueQueryStringParameters": "None",
    "pathParameters": "None",
    "stageVariables": "None",
    "requestContext": {
        "resourceId": "adddazq",
        "resourcePath": "/upload",
        "httpMethod": "POST",
        "extendedRequestId": "cR3o-zddEFgazz=",
        "requestTime": "27/Nov/2022:21:13:42 +0000",
        "path": "/dev/upload",
        "accountId": "114782879802",
        "protocol": "HTTP/1.1",
        "stage": "dev",
        "domainPrefix": "ua8xjwxraf",
        "requestTimeEpoch": 1669583622098,
        "requestId": "23e099f9-eda4-42b2-8b4f-b1aaea589978",
        "identity": {
            "cognitoIdentityPoolId": "None",
            "cognitoIdentityId": "None",
            "apiKey": "h4KOPDl5zoqsdT4pYMkynzdddaz8f95560",
            "principalOrgId": "None",
            "cognitoAuthenticationType": "None",
            "userArn": "None",
            "apiKeyId": "z887qsddox4",
            "userAgent": "PostmanRuntime/7.29.2",
            "accountId": "None",
            "caller": "None",
            "sourceIp": "176.139.21.129",
            "accessKey": "None",
            "cognitoAuthenticationProvider": "None",
            "user": "None"
        },
        "domainName": "um8xxxxpxx.execute-api.eu-west-1.amazonaws.com",
        "apiId": "um8xxxxpxx"
    },
    "body": "x04x08-P�x10,Gh�mx0cx06K����Te�U�-��rx01�Y��l�,3�x11�Q�4$�........6��x1872Ip�d�px1d�M�PX�0`�x�0����d�x0f�x0c.ǃ��x12x00x00r x00x00x01x18��........",
    "isBase64Encoded": "False"
}

Note: I put just a little bit of characters that exist in the body, just for demonstration purpose.

2

Answers


  1. The error is:

    ValueError: string argument should contain only ASCII characters

    The error is on this line:

    contend_decode = base64.b64decode(event['body'])
    

    So, it is saying that event['body'] does not contain base64 encoded data.

    The binary content will actually be provided in the content parameter.

    Therefore, the line should instead be:

    contend_decode = base64.b64decode(event['content'])
    
    Login or Signup to reply.
  2. The Error And A Bunch Of Computer Science

    So I still think that John Rotenstein’s answer is objectively correct, ie the problem is that you can’t decode event['body'] into bytes, because its a string in the form of bytes that have non-ascii characters, and that’s why it is throwing an error.

    If you look at event['body'] you should be able to maybe piece that much together:

    "x04x08-P�x10,Gh�mx0cx06K����Te�U�-��rx01�Y��l�,3�x11�Q�4$�........6��x1872Ip�d�px1d�M�PX�0`�x�0����d�x0f�x0c.ǃ��x12x00x00r x00x00x01x18��........"
    

    Notice that its not throwing a padding error, which occurs when the string is not the right length (typically because of the trailing "="). You’d use decode on a base64 string (eg "TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsu" – stolen from wikipedia) to turn it into bytes.

    Free tid bit of information:

    • Run b64.b64decode("TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsu") to get the string back as a byte string (a = b"Many hands make light work.").
    • Convert it to a list by doing b = list(a) -> [77, 97, 110, 121, 32, 104, 97, 110, 100, 115, 32, 109, 97, 107, 101, 32, 108, 105, 103, 104, 116, 32, 119, 111, 114, 107, 46].
    • Then to its hex representation (I had to format it in notepad afterwards) "".join([hex(c).replace("0x", "\x") for c in b]) -> x4dx61x6ex79x20x68x61x6ex64x73x20x6dx61x6bx65x20x6cx69x67x68x74x20x77x6fx72x6bx2e.
    • The disconnect for me is that with open(filename, "rb") as f; a = f.read() will return something like what you have in your event['body'] if its an image or something of the sort, so you’d assume that b"hello world" would also be bytes similar to that of the with open()..., but apparently not(?). I don’t know; a lot to unpack.

    If you’re unfamiliar with what is in your event['body'], this string is actually decoded bytes – granted this is slightly ambiguous, because the x is actually an escape character for hex in Python, but there are some very easy reproduceable examples where this doesn’t seem to be the case (take your event['body'] for instance – what even is this "x1872Ip�d�p"). You can get decoded bytes from doing something like the below, with the caveat that it was casted to a string, so its no longer a bytes like object – its a string:

    a = "hello world"
    b = a.encode("utf-8")
    # or
    c = bytes(a, "utf-8")
    # or - the one below I think defaults to utf8
    a = b"hello world"
    
    # the closest I could get to hex representation of the string was from this
    # "".join([hex(ord(c)).replace("0x", "\x") for c in a])
    

    Thing is, I don’t know what encoding it was using to decode it into bytes, and its unclear as to if I can expect body to be bytes every time or if it would be a base64 string as isBase64Encoded might would leave me to believe. I’m not 100% certain, but my assumption is that if you do something like the below, granted the resulting decoded string may not be base64, you can get a base64 string output:

    Quick Edit – I believe I misunderstood what isBase64Encoded means. After the writing of this, I think it should be understood as "are the bytes encoded as base64? True or False.", I will edit the below code. Additionally, I will assume that the data for event['body'] underwent one of two processes: either opened as bytes -> isBase64Encoded set as False -> sent or opened as bytes -> b64encoded -> converted to bytes -> isBase64Encoded set as True -> sent. From here on out in this answer, you will see me refer to the answer before this edit as pre edit and after this edit as post edit.

    import base64
    # pre edit
    if not event['isBase64Encoded']:
        event['body'] = bytes(event[body], "whatever that encoding is").decode()
        # b64encode takes a string and converts it to a bytes like object.
        # b64decode takes a bytes like object and converts it to a string.
        event['body'] = base64.b64decode(event['body'])
    print(event['body'])
    
    # post edit
    # you might be able to read bytes with an arbitrary encoding using BytesIO
    from io import BytesIO 
    
    if event['isBase64Encoded']:
        # this would've been sent as the default according to my notes from the edit
        # take the string, convert it to bytes, then decode it - should be a base64 string with a utf8 encoding
        event['body'] = bytes(event['body']).decode()
        # decode the utf8 string to base64 bytes
        event['body'] = base64.b64decode(event['body'])
    else:
        #event['body'] = bytes(event[body], some encoding)
        event['body'] = BytesIO(event[body]).read()
    

    Pre edit – To be 100% clear as to what this does, this:

    1. Checks if it is not a base64 string
    2. If not, convert body to bytes with an encoding, then to a string with decode()
    3. base64decode() takes that string and if its a base64 string (like from above), and converts it to bytes with a base64 encoding

    Post edit – I’ve included some helpful comments in the code, but either way, it should return bytes.


    Pushing Objects to the Bucket

    However, you seem to also want to push those bytes to a bucket – the docs:

    response = client.put_object(
        #Body=bytes(event["body"], encoding),
        # event['body'] should already be bytes by now as per the post edit comments
        Body=event["body"],
        Bucket="my_bucket",
        #ContentEncoding=event["multiValueHeaders"]["Accept-Encoding"],
        ContentType=event["multiValueHeaders"]["Content-Type"],
        Key="my/object/name.mp4"
    )
    

    Pre Edit – So realistically, set all of those key word values and you should be golden – you don’t have to run a base64 decode operation in this instance (based on what was returned in your event – you might if it actually was encoded as a base64 string), just pass put_object() the bytes.

    Post Edit – Still set all the key words and read the below (Content Encoding), but we should’ve handled both cases of isBase64Encoded by now, and the result should be a bytes like object stored in event['body'], so no significant change has to be made to this paragraph in regards to put_object().

    Here is a link to what ContentEncoding is, compared to ContentType, which may shed some light on whether or not you should use it or need to use it.


    What Your Function Might Should Be

    You shouldn’t use such generalized try / except statements like I did below, but if it really bothers you, you can hunt down what those errors throw and add it in yourself or remove them completely, but conceptually, this should be what you want.

    Pre Edit

    import base64
    import boto3
    import os
    
    s3_client = boto3.client('s3')
    bucket_name = os.environ['S3_BUCKET_NAME']
    
    
    def lambda_handler(event, context):
        if not event['isBase64Encoded']:
            try:
                event['body'] = bytes(event[body], "whatever that encoding is").decode()
            except:
                return {
                    # AWS probably returns a 403, so maybe return something different for debugging?
                    'statusCode': 406,
                    'body': 'Misconfigured object.'
                }
        else:
            try:
                event['body'] = base64.b64decode(event['body'])
            except:
                return {
                    # AWS probably returns a 403, so maybe return something different for debugging?
                    'statusCode': 406,
                    'body': 'Misconfigured object.'
                }
    
        try:
            response = client.put_object(
                Body=bytes(event["body"], encoding),
                Bucket="my_bucket",
                #ContentEncoding=event["multiValueHeaders"]["Accept-Encoding"],
                ContentType=event["multiValueHeaders"]["Content-Type"],
                Key="my/object/name.mp4"
            )
        except:
            return {
                # AWS probably returns a 403, so maybe return something different for debugging?
                'statusCode': 406,
                'body': 'Misconfigured object.'
            }
        else:
            print(response)
            return {
                'statusCode': 200,
                'body': 'File uploaded'
            }
    

    Post Edit

    import base64
    import boto3
    import os
    from io import BytesIO
    
    s3_client = boto3.client('s3')
    bucket_name = os.environ['S3_BUCKET_NAME']
    
    
    def lambda_handler(event, context):
    
        if event['isBase64Encoded']:
            # this would've been sent as the default according to my notes from the edit
            # take the string, convert it to bytes, then decode it - should be a base64 string with a utf8 encoding
            event['body'] = bytes(event['body']).decode()
            # decode the utf8 string to base64 bytes
            event['body'] = base64.b64decode(event['body'])
        else:
            #event['body'] = bytes(event[body], some encoding)
            event['body'] = BytesIO(event[body]).read()
    
        try:
            response = client.put_object(
                Body=bytes(event["body"], encoding),
                Bucket="my_bucket",
                #ContentEncoding=event["multiValueHeaders"]["Accept-Encoding"],
                ContentType=event["multiValueHeaders"]["Content-Type"],
                Key="my/object/name.mp4"
            )
        except:
            return {
                # AWS probably returns a 403, so maybe return something different for debugging?
                'statusCode': 406,
                'body': 'Misconfigured object.'
            }
        else:
            print(response)
            return {
                'statusCode': 200,
                'body': 'File uploaded'
            }
    

    Extra Resources

    How does Base64 work? – wikipedia

    Base64 Encode – docs

    Base64 Decode – docs

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search