skip to Main Content

I’m working on AWS Lambda with Go lang. I’m deploying Go lang code, use chromedp , with Docker image and got websocket URL timeout reached error. My lambda setting is with 3008 MB RAM memory, 512MB storage, and 15 minutes timeout. Can you find what is wrong and how to fix this? Here is file main.go and Dockerfile

File main.go (chromedp part)

func getPage(URL string, lineNum string, stationNm string) {
    // settings for crawling
    ctx, cancle := chromedp.NewContext(
        context.Background(),
        chromedp.WithLogf(log.Printf),
    )
    defer cancle()

    opts := []chromedp.ExecAllocatorOption{
        chromedp.DisableGPU,
        chromedp.NoSandbox,
        chromedp.Headless,
        chromedp.Flag("no-zygote", true),
        chromedp.Flag("single-process", true),
        chromedp.Flag("homedir", "/tmp"),
        chromedp.Flag("data-path", "/tmp/data-path"),
        chromedp.Flag("disk-cache-dir", "/tmp/cache-dir"),
        chromedp.Flag("remote-debugging-port", "9222"),
        chromedp.Flag("remote-debugging-address", "0.0.0.0"),
        chromedp.Flag("disable-dev-shm-usage", true),
    }

    allocCtx, cancel := chromedp.NewExecAllocator(ctx, opts...)
    defer cancel()

    ctx, cancel = chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
    defer cancel()

    var htmlContent string

    ch := chromedp.WaitNewTarget(ctx, func(i *target.Info) bool {
        return strings.Contains(i.URL, "/timetable/web/")
    })

}

File Dockerfile

FROM public.ecr.aws/lambda/provided:al2 AS build

ENV GO111MODULE=on 
    CGO_ENABLED=0 
    GOOS=linux 
    GOARCH=amd64

# Get rid of the extension warning
RUN mkdir -p /opt/extensions
RUN yum -y install golang
RUN go env -w GOPROXY=direct

# Clone git, copying go.mod, go.sum, main.go
WORKDIR /var/task/
RUN yum install git -y
RUN git clone https://github.com/seedspirit/NaverCrawler-CICD-go.git
RUN cp NaverCrawler-CICD-go/main.go /var/task/
RUN cp NaverCrawler-CICD-go/go.mod /var/task/
RUN cp NaverCrawler-CICD-go/go.sum /var/task/

# cache dependencies
RUN go mod download
RUN go build -o main .

FROM public.ecr.aws/lambda/provided:al2
COPY --from=build /var/task/main /var/task/main

# Install Chrome dependencies
RUN curl https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm -o chrome.rpm && 
    yum install -y ./chrome.rpm && 
    yum install -y fontconfig libX11 GConf2 dbus-x11

ENTRYPOINT ["/var/task/main"]

2

Answers


  1. Chosen as BEST ANSWER

    For people come here! I Solved in this way

    Dockerfile

    FROM golang:1.20.4-alpine3.17 AS builder
    
    ENV GO111MODULE=on 
        CGO_ENABLED=0 
        GOOS=linux 
        GOARCH=amd64
    
    WORKDIR /app
    
    RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/*
    
    COPY go.mod go.sum main.go ./
    RUN go mod download
    
    COPY . .
    
    RUN go build -o main
    
    FROM chromedp/headless-shell:113.0.5672.93
    
    WORKDIR /app
    
    COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
    
    COPY --from=builder /app/main .
    
    ENTRYPOINT [ "./main" ]
    

    function

    func getPage(URL string, lineNum string, stationNm string) {
        // settings for crawling
        opts := append(chromedp.DefaultExecAllocatorOptions[:],
            chromedp.NoSandbox,
            chromedp.Flag("disable-setuid-sandbox", true),
            chromedp.Flag("disable-dev-shm-usage", true),
            chromedp.Flag("single-process", true),
            chromedp.Flag("no-zygote", true),
        )
    
        alloCtx, _ := chromedp.NewExecAllocator(context.Background(), opts...)
    
        ctx, cancel := chromedp.NewContext(alloCtx, chromedp.WithLogf(log.Printf))
        defer cancel()
    
        var htmlContent string
    
        ch := chromedp.WaitNewTarget(ctx, func(i *target.Info) bool {
            return strings.Contains(i.URL, "/timetable/web/")
        })
    
        
        err := chromedp.Run(ctx,
            chromedp.Navigate(URL),
            chromedp.WaitVisible(".end_footer_area"),
            chromedp.Click("button"),
        )
        checkErr(err)
    
        newContext, cancel := chromedp.NewContext(ctx, chromedp.WithTargetID(<-ch))
        defer cancel()
        if err := chromedp.Run(newContext,
            chromedp.WaitReady(".table_schedule", chromedp.ByQuery),
            chromedp.OuterHTML(".schedule_wrap", &htmlContent, chromedp.ByQuery),
        ); err != nil {
            panic(err)
        }
    
        crawler(htmlContent, lineNum, stationNm)
    }
    

  2. It’s recommended to use chromedp/headless-shell because it’s small and more suitable for AWS Lambda.

    I just tested a simple demo with chromedp/headless-shell, and it works.

    Dockerfile:

    FROM golang:1.20.4-alpine3.17 AS builder
    
    WORKDIR /app
    
    COPY go.mod go.sum ./
    RUN go mod download
    
    COPY . .
    
    RUN go build -o main
    
    FROM chromedp/headless-shell:113.0.5672.93
    
    WORKDIR /app
    
    COPY --from=builder /app/main .
    
    ENTRYPOINT [ "./main" ]
    

    main.go:

    package main
    
    import (
        "context"
        "encoding/json"
        "fmt"
        "log"
        "os"
    
        "github.com/aws/aws-lambda-go/lambda"
        "github.com/chromedp/chromedp"
    )
    
    func Handler(_ context.Context, _ json.RawMessage) error {
        opts := []chromedp.ExecAllocatorOption{
            chromedp.NoSandbox,
            chromedp.Flag("disable-setuid-sandbox", true),
            chromedp.Flag("disable-dev-shm-usage", true),
            chromedp.Flag("single-process", true),
            chromedp.Flag("no-zygote", true),
        }
        ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
        defer cancel()
    
        ctx, cancel = chromedp.NewContext(ctx, chromedp.WithDebugf(log.Printf))
        defer cancel()
    
        var content string
        if err := chromedp.Run(ctx, chromedp.Tasks{
            chromedp.Navigate("https://example.com/"),
            chromedp.Text("body > div > p:nth-child(2)", &content),
        }); err != nil {
            log.Fatal(err)
        }
        fmt.Println(content)
        return nil
    }
    
    func main() {
        if _, exists := os.LookupEnv("AWS_LAMBDA_RUNTIME_API"); exists {
            lambda.Start(Handler)
        } else {
            err := Handler(context.Background(), nil)
            if err != nil {
                log.Fatal(err)
            }
        }
    }
    

    This example is based on https://github.com/Andiedie/chromedp-aws-lambda-example. Please note that the chromedp.ExecAllocatorOptions listed is copied directly from that repository. It works, but I’m not sure whether this is the best list of options. Maybe you have to adjust them according to your needs.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search