skip to Main Content

I am using telethon to scrape members list from a group chat in telegram then save that data to a google sheets. This code works fine with small groups/channels (under ~2k members/subcribers).

from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty

from google.oauth2 import service_account    # import for google sheets API
from googleapiclient.discovery import build

SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = 'key.json'
cred = None
cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

class Scraper():
    def __init__(self):
        # Enter 7-digit Telegram API ID
        self.api_id =  1234567
        # Enter 32-character API Hash
        self.api_hash = "randomrandomrandom"  
        # Enter mobile number with country code 
        self.phone = "myphonenumber"

        self.client = TelegramClient(self.phone, self.api_id, self.api_hash)
        self.groups=[]

    def connect(self):
        # Connecting to Telegram and checking if user is already authorized. 
        # Otherwise send an OTP code request and ask user to enter the code 
        # they received on their telegram account. After logged in, a *.session file
        # will be created. This is a database file which makes your session persistent.
        
        self.client.connect()
        if not self.client.is_user_authorized():
            self.client.send_code_request(self.phone)
            self.client.sign_in(self.phone, input("Enter OTP code: "))
           
    def getGroups(self):
        # This method will get all groups in chat list.
        # offset_date and  offset_peer are used for filtering the chats, 
        # sending empty values to these parameters so API returns all chats.
        # offset_id and limit are used for pagination.
        # This limit will show last 10 chats of the user.
        
        chatslist = []
        last_date = None
        chunk_size = 10
        result = self.client(GetDialogsRequest(
                            offset_date=last_date,
                            offset_id=0,
                            offset_peer=InputPeerEmpty(),
                            limit=chunk_size,
                            hash = 0
                            ))
        chatslist.extend(result.chats)
        
        for chat in chatslist:
            try:
                if chat.megagroup == True:
                    self.groups.append(chat)
            except:
                continue

        # Choose which group to scrape members from
        for i, g in enumerate(self.groups):
            print(str(i) + '- ' + g.title)
            
     
    def saveToSheet(self):
        # This method will save all group members 
        # to a google sheets.

        SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
        SERVICE_ACCOUNT_FILE = "key.json"
        cred = None
        cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

        g_index = input("Choose a channel to scrape members from: ")
        target_group = self.groups[int(g_index)] 
        print("Fetching members...")

        all_participants = ()
        all_participants = self.client.get_participants(target_group)

        print("Saving to sheet...")
        uid = []
        username = []
        fullname = []

        for user in all_participants:
            # member_uid
            uid.append(user.id)

            # member_username
            if user.username:
                username.append(user.username)
            else:
                username.append("")

            # member_fullname
            if user.first_name:
                first_name = user.first_name
            else:
                first_name = ""
            if user.last_name:
                last_name = user.last_name
            else:
                last_name = ""
            fname = (first_name + " " + last_name).strip()   
            fullname.append(fname)       
        # print(uid)
        # print(username)
        # print(fullname) 

        length = len(uid)
        concat = [[] for i in range(length)]
        for elem in range(length):
            concat[elem].append(uid[elem])
            concat[elem].append(username[elem])
            concat[elem].append(fullname[elem]) 

        # The ID and range of target spreadsheet.
        SAMPLE_SPREADSHEET_ID = "<MY-SHEET-ID>" #epicwar group
        RANGE = "Sheet1!A2:C"
        service = build("sheets", "v4", credentials=cred)

        # Call the Sheets API
        sheet = service.spreadsheets()
        # update sheet
        request = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID, 
                                        range=RANGE, 
                                        valueInputOption="RAW", 
                                        body={"values":concat}).execute()
        print(request)            

        print("**Members scraped successfully**")



if __name__ == '__main__':
    telegram = Scraper()
    telegram.connect()
    telegram.getGroups()
    telegram.saveToSheet()

But when I try a larger group (from 3k to 6k) it only able to return 90% members and if a group has more than 6k members, it throws this error:

Traceback (most recent call last):
  File "d:crawlertelegramgroupmain2.py", line 149, in <module>
    telegram.saveToSheet()
  File "d:crawlertelegramgroupmain2.py", line 88, in saveToSheet
    all_participants = self.client.get_participants(target_channel)
  File "G:Program FilesPython310libsite-packagestelethonsync.py", line 39, in syncified
    return loop.run_until_complete(coro)
  File "G:Program FilesPython310libasynciobase_events.py", line 641, in run_until_complete
    return future.result()
  File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 507, in get_participants
    return await self.iter_participants(*args, **kwargs).collect()
  File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 113, in collect
    async for message in self:
  File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 74, in __anext__
    if await self._load_next_chunk():
  File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 224, in _load_next_chunk
    participants = results[i]
TypeError: 'ChannelParticipants' object is not subscriptable

I have also read the documentation for method get_participants. It has parameter aggresive=true but I think it was depracated and when used the code will not work at all (even for small group) and throw FloodWaitError:

Traceback (most recent call last):
  File "d:crawlertelegramgroupmain2.py", line 149, in <module>
    telegram.saveToSheet()
  File "d:crawlertelegramgroupmain2.py", line 88, in saveToSheet
    all_participants = self.client.get_participants(target_group, aggressive = True)
  File "G:Program FilesPython310libsite-packagestelethonsync.py", line 39, in syncified
    return loop.run_until_complete(coro)
  File "G:Program FilesPython310libasynciobase_events.py", line 641, in run_until_complete
    return future.result()
  File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 507, in get_participants
    return await self.iter_participants(*args, **kwargs).collect()
  File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 113, in collect
    async for message in self:
  File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 74, in __anext__
    if await self._load_next_chunk():
  File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 222, in _load_next_chunk
    results = await self.client(self.requests)
  File "G:Program FilesPython310libsite-packagestelethonclientusers.py", line 30, in __call__
    return await self._call(self._sender, request, ordered=ordered)
  File "G:Program FilesPython310libsite-packagestelethonclientusers.py", line 80, in _call
    raise MultiError(exceptions, results, requests)
telethon.errors.common.MultiError: ([FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, FloodWaitError('A wait of 
31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, None, FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)')], [None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9ECACE0>, None, None, None, None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9DE0>, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9150>, None], [<telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBBE0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBCA0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9720>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA4D0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9C90>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9F00>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA020>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA110>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9A80>])

Can anyone provide me some suggestions or substitutions?

2

Answers


  1. the author hints to update to the version of the developer’s library

    https://github.com/LonamiWebs/Telethon/issues/928#issuecomment-1036062092

    Login or Signup to reply.
  2. all_participants = client.get_participants(target_group)

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search