I am using telethon
to scrape members list from a group chat in telegram then save that data to a google sheets. This code works fine with small groups/channels (under ~2k members/subcribers).
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty
from google.oauth2 import service_account # import for google sheets API
from googleapiclient.discovery import build
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = 'key.json'
cred = None
cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
class Scraper():
def __init__(self):
# Enter 7-digit Telegram API ID
self.api_id = 1234567
# Enter 32-character API Hash
self.api_hash = "randomrandomrandom"
# Enter mobile number with country code
self.phone = "myphonenumber"
self.client = TelegramClient(self.phone, self.api_id, self.api_hash)
self.groups=[]
def connect(self):
# Connecting to Telegram and checking if user is already authorized.
# Otherwise send an OTP code request and ask user to enter the code
# they received on their telegram account. After logged in, a *.session file
# will be created. This is a database file which makes your session persistent.
self.client.connect()
if not self.client.is_user_authorized():
self.client.send_code_request(self.phone)
self.client.sign_in(self.phone, input("Enter OTP code: "))
def getGroups(self):
# This method will get all groups in chat list.
# offset_date and offset_peer are used for filtering the chats,
# sending empty values to these parameters so API returns all chats.
# offset_id and limit are used for pagination.
# This limit will show last 10 chats of the user.
chatslist = []
last_date = None
chunk_size = 10
result = self.client(GetDialogsRequest(
offset_date=last_date,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=chunk_size,
hash = 0
))
chatslist.extend(result.chats)
for chat in chatslist:
try:
if chat.megagroup == True:
self.groups.append(chat)
except:
continue
# Choose which group to scrape members from
for i, g in enumerate(self.groups):
print(str(i) + '- ' + g.title)
def saveToSheet(self):
# This method will save all group members
# to a google sheets.
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
SERVICE_ACCOUNT_FILE = "key.json"
cred = None
cred = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
g_index = input("Choose a channel to scrape members from: ")
target_group = self.groups[int(g_index)]
print("Fetching members...")
all_participants = ()
all_participants = self.client.get_participants(target_group)
print("Saving to sheet...")
uid = []
username = []
fullname = []
for user in all_participants:
# member_uid
uid.append(user.id)
# member_username
if user.username:
username.append(user.username)
else:
username.append("")
# member_fullname
if user.first_name:
first_name = user.first_name
else:
first_name = ""
if user.last_name:
last_name = user.last_name
else:
last_name = ""
fname = (first_name + " " + last_name).strip()
fullname.append(fname)
# print(uid)
# print(username)
# print(fullname)
length = len(uid)
concat = [[] for i in range(length)]
for elem in range(length):
concat[elem].append(uid[elem])
concat[elem].append(username[elem])
concat[elem].append(fullname[elem])
# The ID and range of target spreadsheet.
SAMPLE_SPREADSHEET_ID = "<MY-SHEET-ID>" #epicwar group
RANGE = "Sheet1!A2:C"
service = build("sheets", "v4", credentials=cred)
# Call the Sheets API
sheet = service.spreadsheets()
# update sheet
request = sheet.values().update(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range=RANGE,
valueInputOption="RAW",
body={"values":concat}).execute()
print(request)
print("**Members scraped successfully**")
if __name__ == '__main__':
telegram = Scraper()
telegram.connect()
telegram.getGroups()
telegram.saveToSheet()
But when I try a larger group (from 3k to 6k) it only able to return 90% members and if a group has more than 6k members, it throws this error:
Traceback (most recent call last):
File "d:crawlertelegramgroupmain2.py", line 149, in <module>
telegram.saveToSheet()
File "d:crawlertelegramgroupmain2.py", line 88, in saveToSheet
all_participants = self.client.get_participants(target_channel)
File "G:Program FilesPython310libsite-packagestelethonsync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "G:Program FilesPython310libasynciobase_events.py", line 641, in run_until_complete
return future.result()
File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 507, in get_participants
return await self.iter_participants(*args, **kwargs).collect()
File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 113, in collect
async for message in self:
File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 74, in __anext__
if await self._load_next_chunk():
File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 224, in _load_next_chunk
participants = results[i]
TypeError: 'ChannelParticipants' object is not subscriptable
I have also read the documentation for method get_participants. It has parameter aggresive=true
but I think it was depracated and when used the code will not work at all (even for small group) and throw FloodWaitError
:
Traceback (most recent call last):
File "d:crawlertelegramgroupmain2.py", line 149, in <module>
telegram.saveToSheet()
File "d:crawlertelegramgroupmain2.py", line 88, in saveToSheet
all_participants = self.client.get_participants(target_group, aggressive = True)
File "G:Program FilesPython310libsite-packagestelethonsync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "G:Program FilesPython310libasynciobase_events.py", line 641, in run_until_complete
return future.result()
File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 507, in get_participants
return await self.iter_participants(*args, **kwargs).collect()
File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 113, in collect
async for message in self:
File "G:Program FilesPython310libsite-packagestelethonrequestiter.py", line 74, in __anext__
if await self._load_next_chunk():
File "G:Program FilesPython310libsite-packagestelethonclientchats.py", line 222, in _load_next_chunk
results = await self.client(self.requests)
File "G:Program FilesPython310libsite-packagestelethonclientusers.py", line 30, in __call__
return await self._call(self._sender, request, ordered=ordered)
File "G:Program FilesPython310libsite-packagestelethonclientusers.py", line 80, in _call
raise MultiError(exceptions, results, requests)
telethon.errors.common.MultiError: ([FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, FloodWaitError('A wait of
31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)'), None, None, FloodWaitError('A wait of 31 seconds is required (caused by GetParticipantsRequest)')], [None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9ECACE0>, None, None, None, None, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9DE0>, <telethon.tl.types.channels.ChannelParticipants object at 0x0000018CD9EC9150>, None], [<telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBBE0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECBCA0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9720>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA4D0>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9C90>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9F00>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA020>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9ECA110>, <telethon.tl.functions.channels.GetParticipantsRequest object at 0x0000018CD9EC9A80>])
Can anyone provide me some suggestions or substitutions?
2
Answers
the author hints to update to the version of the developer’s library
https://github.com/LonamiWebs/Telethon/issues/928#issuecomment-1036062092
all_participants = client.get_participants(target_group)