skip to Main Content

I have a problem with my R dataset.
How can I calculate the correlation with the lapply function between sales and share price – as a quick reference, so to speak. I tried this – but doens’t work:

my_correlation <- function(subset_df) {
  subset_correlation <- image(cor(subset_df), x=Sales, y=Stockprice_quarterly)
  subset_correlation
}

ss <- lapply(unique(Nasdaq_100$TickerSymbol), function(ticker) 
  my_correlation(subset(Nasdaq_100, Nasdaq_100$TickerSymbol == ticker)))

This is a sample I created to show the structure of my dataset:

TickerSymbol Quarter Sales Stockprice_quarterly
AMD 31.03.2021 $0.45 502.500
AMD 31.12.2020 $1.47 361.100
AMD 30.09.2020 $0.32 280.700
AMD 30.06.2020 $0.13 377.400
AMD 31.03.2020 $0.14 296.900
AMD 31.12.2019 $0.15 274.800
AMD 30.09.2019 $0.11 561.200
AMD 30.06.2019 $0.03 548.650
AMD 31.03.2019 $0.01 509.977
AAPL 31.03.2021 $1.40 359.038
AAPL 31.12.2020 $1.68 358.514
AAPL 30.09.2020 $0.75 357.991
AAPL 30.06.2020 $0.65 357.467
AAPL 31.03.2020 $0.64 356.944
AAPL 31.12.2019 $1.25 356.421
AAPL 30.09.2019 $0.77 355.897
AAPL 30.06.2019 $0.55 355.374
AAPL 31.03.2019 $0.62 354.851
EBAY 31.03.2021 $0.92 325.020
EBAY 31.12.2020 $1.39 324.496
EBAY 30.09.2020 $0.94 323.973
EBAY 30.06.2020 $1.05 323.449
EBAY 31.03.2020 $4.51 322.926
EBAY 31.12.2019 $0.69 322.403
EBAY 30.09.2019 $0.37 321.879
EBAY 30.06.2019 $0.46 321.356
EBAY 31.03.2019 $0.57 320.833

Thanks in advance for any help!

2

Answers


  1. There is a $ sign in Sales. Maybe Sales was converted to a character vecter during data import? You can remove the sign and convert it to numeric. Here are two possible variations of my_correlation() – one uses subset() and another [.

    # Remove $ sign
    dat$Sales <- as.numeric(sub("\$", "", dat$Sales))
    
    # First variation
    my_correlation_1 <- function(ticker_subset, data) {
      cor(subset(data, TickerSymbol == ticker_subset, c(Sales, Stockprice_quarterly)))
    }
    
    mycor1 <- lapply(unique(dat$TickerSymbol), my_correlation_1, data = dat)
    names(mycor1) <- unique(dat$TickerSymbol)
    
    # Second variation
    my_correlation_2 <- function(ticker_subset, data) {
      cor(data[data$TickerSymbol == ticker_subset, c("Sales", "Stockprice_quarterly")])
    }
    
    mycor2 <- lapply(unique(dat$TickerSymbol), my_correlation_2, data = dat)
    names(mycor2) <- unique(dat$TickerSymbol)
    
    mycor2
    
    # $AMD
    #                           Sales Stockprice_quarterly
    # Sales                 1.0000000           -0.2261417
    # Stockprice_quarterly -0.2261417            1.0000000
    # 
    # $AAPL
    #                          Sales Stockprice_quarterly
    # Sales                1.0000000            0.6531391
    # Stockprice_quarterly 0.6531391            1.0000000
    # 
    # $EBAY
    #                          Sales Stockprice_quarterly
    # Sales                1.0000000            0.2032839
    # Stockprice_quarterly 0.2032839            1.0000000
    

    Data:

    dat <- structure(list(TickerSymbol = c("AMD", "AMD", "AMD", "AMD", "AMD", 
    "AMD", "AMD", "AMD", "AMD", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", 
    "AAPL", "AAPL", "AAPL", "AAPL", "EBAY", "EBAY", "EBAY", "EBAY", 
    "EBAY", "EBAY", "EBAY", "EBAY", "EBAY"), Quarter = c("31.03.2021", 
    "31.12.2020", "30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", 
    "30.09.2019", "30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", 
    "30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", 
    "30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", "30.09.2020", 
    "30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", "30.06.2019", 
    "31.03.2019"), Sales = c("$0.45", "$1.47", "$0.32", "$0.13", 
    "$0.14", "$0.15", "$0.11", "$0.03", "$0.01", "$1.40", "$1.68", 
    "$0.75", "$0.65", "$0.64", "$1.25", "$0.77", "$0.55", "$0.62", 
    "$0.92", "$1.39", "$0.94", "$1.05", "$4.51", "$0.69", "$0.37", 
    "$0.46", "$0.57"), Stockprice_quarterly = c(502.5, 361.1, 280.7, 
    377.4, 296.9, 274.8, 561.2, 548.65, 509.977, 359.038, 358.514, 
    357.991, 357.467, 356.944, 356.421, 355.897, 355.374, 354.851, 
    325.02, 324.496, 323.973, 323.449, 322.926, 322.403, 321.879, 
    321.356, 320.833)), class = "data.frame", row.names = c(NA, -27L
    ))
    
    Login or Signup to reply.
  2. tidiverse

    library(tidyverse)
    df %>% 
      mutate(Sales = parse_number(Sales)) %>% 
      group_split(TickerSymbol) %>% 
      map(~cor(select(.data = .x, Sales, Stockprice_quarterly))) %>% 
      purrr::set_names(., nm = unique(df$TickerSymbol))
    
    $AMD
                             Sales Stockprice_quarterly
    Sales                1.0000000            0.6531391
    Stockprice_quarterly 0.6531391            1.0000000
    
    $AAPL
                              Sales Stockprice_quarterly
    Sales                 1.0000000           -0.2261417
    Stockprice_quarterly -0.2261417            1.0000000
    
    $EBAY
                             Sales Stockprice_quarterly
    Sales                1.0000000            0.2032839
    Stockprice_quarterly 0.2032839            1.0000000
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search