skip to Main Content

The data that is being acquired by this code, is not being formatted into proper csv format.

import requests
import csv

def Download_data():
    s = requests.Session()
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'}
    s.headers.update(headers)
    resp = s.get('https://www.nseindia.com/market-data/live-equity-market')
    resp.raise_for_status()
    resp = s.get('https://www.nseindia.com/api/equity-stockIndices?csv=true&index=NIFTY%2050')
    resp.raise_for_status()
    data_79 = resp.text
    data_79 = resp.text.replace('","', '')
    with open('___N50__.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows([line.split(',') for line in data_79.splitlines()])

if __name__ == "__main__":
    Download_Fresh_data2()

The headers are all in rows rather than one column.

I have tried to arrive at this code after lot of learning, but it is still insufficient knowledge from my end. Kindly help!

3

Answers


  1. If the url you are accessing has a valid CSV file, then you can direcly read the csv into pandas dataframe and then save to local machine as follows:

    import pandas as pd
    import io
    import requests
    url="https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv"
    s=requests.get(url).content
    c=pd.read_csv(io.StringIO(s.decode('utf-8')))
    c.to_csv(r"C:users123Downloadscountries.csv")
    
    Login or Signup to reply.
  2. Did you take a look at the data with a text editor? resp.text seems to start with a BOM, and each field of the header ends with a linefeed. IMHO the data needs some cleaning:

        # ...
    
        // strip BOM:
        data_79 = re.sub(r'A[^"]+"', '"', resp.text, 1)
    
        // strip unwanted linefeeds:
        data_79 = re.sub(r'([^"])n', '\1', data_79)
    
        // save the data in a file
        with open('___N50__.csv', 'w', newline='') as file:
            file.write(data_79)
    
    Login or Signup to reply.
  3. One possibility is to skip the header rows and explicitly add them:

    from io import StringIO
    
    import pandas as pd
    import requests
    
    
    def download_data():
        url = "https://www.nseindia.com/api/equity-stockIndices?csv=true&index=NIFTY%2050"
        headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36"}
    
        with requests.Session() as request:
            request.get(url="https://www.nseindia.com/market-data/live-equity-market", headers=headers)
            response = request.get(url=url, headers=headers)
        if response.status_code != 200:
            print(response.raise_for_status())
    
        df = pd.read_csv(filepath_or_buffer=StringIO(initial_value=response.text), skiprows=13, header=None,
                         names=["SYMBOL", "OPEN", "HIGH", "LOW", "PREV. CLOSE", "LTP",
                                "CHNG", "%CHNG", "VOLUME", "52W H", "52W L",
                                "30 D %CHNG", "365 D % CHNG", "TODAY"])
        df.to_csv(path_or_buf="/path/to/file/N50.csv", index=False)
    
    
    if __name__ == "__main__":
        download_data()
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search