skip to Main Content

I am not to sure what I am doing wrong. I am trying to parse the specific contents within JavaScript.

This is the output of “s” (for the code below it):

<script type="text/javascript">window._sharedData = {"activity_counts":{"comment_likes":0,"comments":0,"likes":0,"relationships":0,"usertags":0},"config":{"csrf_token":"OIXAF5a6FwMQJj3vCaUQXCGUGL3sFb0Z","viewer":{"allow_contacts_sync":false,"biography":"Follow for the best social media experience. Est. 2014","external_url":null,"full_name":"Social Media Bliztexnetwork","has_profile_pic":true,"id":"6440587166","profile_pic_url":"https://instagram.fbed1-1.fna.fbcdn.net/vp/dd5d8db8ca1645ac8b69fdaf8886184f/5BB11538/t51.2885-19/s150x150/32947488_229940584435561_2806247690365566976_n.jpg","profile_pic_url_hd":"https://instagram.fbed1-1.fna.fbcdn.net/vp/df4d5098687fe594c5b2d9750804941a/5BEC5FC8/t51.2885-19/s320x320/32947488_229940584435561_2806247690365566976_n.jpg","username":"bliztezxxmedia"}},"supports_es6":false,"country_code":"US","language_code":"en","locale":"en_US","entry_data":{"ProfilePage":[{"logging_page_id":"profilePage_7507466602","show_suggested_profiles":false,"graphql":{"user":{"biography":"What a wonderful day!!!","blocked_by_viewer":false,"country_block":false,"external_url":null,"external_url_linkshimmed":null,"edge_followed_by":{"count":17},"followed_by_viewer":true,"edge_follow":{"count":8},"follows_viewer":false,"full_name":"Verna Manning","has_channel":false,"has_blocked_viewer":false,"highlight_reel_count":0,"has_requested_viewer":false,"id":"7507466602","is_private":true,"is_verified":false,"mutual_followers":{"additional_count":-3,"usernames":[]},"profile_pic_url":"https://instagram.fbed1-1.fna.fbcdn.net/vp/96e65311d0a5e79729411bd582592816/5BCC9C5A/t51.2885-19/s150x150/33143922_237271910362316_6290555001760645120_n.jpg","profile_pic_url_hd":"https://instagram.fbed1-1.fna.fbcdn.net/vp/96e65311d0a5e79729411bd582592816/5BCC9C5A/t51.2885-19/s150x150/33143922_237271910362316_6290555001760645120_n.jpg","requested_by_viewer":false,"username":"vernamanning46464","connected_fb_page":null,"edge_felix_combined_post_uploads":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_felix_combined_draft_uploads":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_felix_video_timeline":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_felix_drafts":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_felix_pending_post_uploads":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_felix_pending_draft_uploads":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_owner_to_timeline_media":{"count":2,"page_info":{"has_next_page":false,"end_cursor":"AQAQt_06KHhticevO8Am12l3GJ1CdrZVdUztIDyZN7oXm_IVmr2Clwi844aWh9oe9TU"},"edges":[{"node":{"__typename":"GraphImage","id":"1810494542282448836","edge_media_to_caption":{"edges":[{"node":{"text":"What a sunny day!"}}]},"shortcode":"BkgKzGch1_EsxkqWK-4ZjG_XoWfrFxgXIOrZqs0","edge_media_to_comment":{"count":24},"comments_disabled":false,"taken_at_timestamp":1530047789,"dimensions":{"height":1080,"width":1080},"display_url":"https://instagram.fbed1-1.fna.fbcdn.net/vp/d82d797684ce57fef7a9fe87c74d2342/5BCE0CF2/t51.2885-15/s1080x1080/e15/fr/35274418_207295373248007_2552664476088270848_n.jpg","edge_liked_by":{"count":0},"edge_media_preview_like":{"count":0},"gating_info":null,"media_preview":"ACoqnuL0Qj5cMT0Gf1rBdi5LHqasXLK8hZBtB7fz/WoMV1xjZGLZHijFSbaNtXYVwhgaZtq/n2H1q9/Zn/TRfyNVAxUYHGetNyfU1LT6DuXnsnTn09f6VX21tERy/wAZCjt3P41BNaqMGLkHj1pRl0luJrqjM20m2rrwGM7TjPtzTobYynGQB3J7VpdWv0I8ihtpNtdGkdvGnlvtb1PqaZstPQfmf8az9ouzL5X3RmLMGPIxgdulSiYDgZAPNVQOB/nvSjmvP9pLa5fKibzlPTmjzlH0qMKB27H+ZqOn7SXcXKiwZFAzTPNHpVVSc1ISaPaz7i5Uf//Z","owner":{"id":"7507466602"},"thumbnail_src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/7cecb59edaba9f9f7565604eac28d8df/5BC63210/t51.2885-15/s640x640/sh0.08/e35/35274418_207295373248007_2552664476088270848_n.jpg","thumbnail_resources":[{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/b499ce5fafa113fe57f7325d86628900/5BE96296/t51.2885-15/s150x150/e15/35274418_207295373248007_2552664476088270848_n.jpg","config_width":150,"config_height":150},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/f124ca8254e24569515be5f3f99ff911/5BE9A3A9/t51.2885-15/s240x240/e15/35274418_207295373248007_2552664476088270848_n.jpg","config_width":240,"config_height":240},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/5c82e7c2ae3905863fe25150fca1f5e4/5BCB4ED1/t51.2885-15/s320x320/e15/35274418_207295373248007_2552664476088270848_n.jpg","config_width":320,"config_height":320},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/e76685c6614c444d8ed5f04efc01435a/5BB6D257/t51.2885-15/s480x480/e15/35274418_207295373248007_2552664476088270848_n.jpg","config_width":480,"config_height":480},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/7cecb59edaba9f9f7565604eac28d8df/5BC63210/t51.2885-15/s640x640/sh0.08/e35/35274418_207295373248007_2552664476088270848_n.jpg","config_width":640,"config_height":640}],"is_video":false}},{"node":{"__typename":"GraphImage","id":"1757529388200541080","edge_media_to_caption":{"edges":[{"node":{"text":"What a nice day."}}]},"shortcode":"Bhj_6qyALuYgmy2sPgmUtoBcmcxZWGeyLkM3O00","edge_media_to_comment":{"count":3},"comments_disabled":false,"taken_at_timestamp":1523733851,"dimensions":{"height":1080,"width":1080},"display_url":"https://instagram.fbed1-1.fna.fbcdn.net/vp/16610d58bb6cc90893ffd264f81755c6/5BAD1DBC/t51.2885-15/s1080x1080/e15/fr/30590929_101347367387069_7153309976138612736_n.jpg","edge_liked_by":{"count":1},"edge_media_preview_like":{"count":1},"gating_info":null,"media_preview":"ACoqwgadupijNLn0oAdvNPErDoahopiLkdw6nIPNTfaX9apxDNS7fencLDIAP1qFsZ4qYAx9CDmmMmD1B/GsxkRoqXbnjjP1FN2H2/MVVwHKcVJvqLYf8ml2tSAtbce/vjFIFYeh/Sn0Uhjh7gU8Njjt+dMFLQA4Y9B+HFO+X0/z+VMpaAP/2Q==","owner":{"id":"7507466602"},"thumbnail_src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/a4dfd1c28505301d4c440c95023fbbc7/5BC81D5E/t51.2885-15/s640x640/sh0.08/e35/30590929_101347367387069_7153309976138612736_n.jpg","thumbnail_resources":[{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/d69525a20a2e61b2ee8663daf287a8ee/5BB46AD8/t51.2885-15/s150x150/e15/30590929_101347367387069_7153309976138612736_n.jpg","config_width":150,"config_height":150},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/4034b1752e3a4bace405aadd5a35477c/5BCB79E7/t51.2885-15/s240x240/e15/30590929_101347367387069_7153309976138612736_n.jpg","config_width":240,"config_height":240},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/96b684f38cb826f3efd8a7610ed6e9bb/5BEB899F/t51.2885-15/s320x320/e15/30590929_101347367387069_7153309976138612736_n.jpg","config_width":320,"config_height":320},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/51dac35520bc90d7c7253cc331acf561/5BB44919/t51.2885-15/s480x480/e15/30590929_101347367387069_7153309976138612736_n.jpg","config_width":480,"config_height":480},{"src":"https://instagram.fbed1-1.fna.fbcdn.net/vp/a4dfd1c28505301d4c440c95023fbbc7/5BC81D5E/t51.2885-15/s640x640/sh0.08/e35/30590929_101347367387069_7153309976138612736_n.jpg","config_width":640,"config_height":640}],"is_video":false}}]},"edge_saved_media":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_media_collections":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]}}},"felix_onboarding_video_resources":{"mp4":"/static/videos/felix-onboarding/onboardingVideo.mp4/9d16838ca7f9.mp4","poster":"/static/images/felix-onboarding/onboardingVideoPoster.png/8fdba7cf2120.png"}}]},"gatekeepers":{"ld":true,"rt":true,"sw":true,"vl":true,"seo":true,"seoht":true,"2fac":true,"sf":true,"saa":true,"ai":true},"knobs":{"acct:ntb":0,"cb":0,"captcha":0},"qe":{"dash_for_vod":{"g":"","p":{}},"aysf":{"g":"","p":{}},"bc3l":{"g":"","p":{}},"comment_reporting":{"g":"","p":{}},"direct_conversation_reporting":{"g":"","p":{}},"direct_reporting":{"g":"","p":{}},"reporting":{"g":"","p":{}},"media_reporting":{"g":"","p":{}},"acc_recovery_link":{"g":"","p":{}},"notif":{"g":"","p":{}},"drct_nav":{"g":"","p":{}},"fb_unlink":{"g":"","p":{}},"mobile_stories_doodling":{"g":"","p":{}},"move_comment_input_to_top":{"g":"","p":{}},"mobile_cancel":{"g":"","p":{}},"mobile_search_redesign":{"g":"","p":{}},"show_copy_link":{"g":"control","p":{"show_copy_link_option":"false"}},"mobile_logout":{"g":"","p":{}},"pl_pivot_li":{"g":"control_0423","p":{"show_pivot":"false"}},"pl_pivot_lo":{"g":"","p":{}},"404_as_react":{"g":"","p":{}},"acc_recovery":{"g":"test_with_prefill","p":{"has_prefill":"true"}},"collections":{"g":"","p":{}},"comment_ta":{"g":"","p":{}},"connections":{"g":"control","p":{"has_suggestion_context_in_feed":"false"}},"disc_ppl":{"g":"control_02_27","p":{"has_follow_all_button":"false","has_pagination":"false"}},"embeds":{"g":"","p":{}},"ebdsim_li":{"g":"control_shadow_0322","p":{"is_shadow_enabled":"false","use_new_ui":"true"}},"ebdsim_lo":{"g":"","p":{}},"empty_feed":{"g":"","p":{}},"bundles":{"g":"","p":{}},"exit_story_creation":{"g":"","p":{}},"gdpr_logged_out":{"g":"","p":{}},"appsell":{"g":"","p":{}},"imgopt":{"g":"control","p":{}},"follow_button":{"g":"test","p":{"is_inline":"true"}},"loggedout":{"g":"","p":{}},"loggedout_upsell":{"g":"test_with_new_loggedout_upsell_content_03_15_18","p":{"has_new_loggedout_upsell_content":"true"}},"us_li":{"g":"Test","p":{"show_related_media":"true"}},"msisdn":{"g":"","p":{}},"bg_sync":{"g":"","p":{}},"onetaplogin":{"g":"default_opt_in","p":{"default_value":"true","during_reg":"true","storage_version":"one_tap_storage_version"}},"onetaplogin_userbased":{"g":"","p":{}},"login_poe":{"g":"","p":{}},"prvcy_tggl":{"g":"","p":{}},"private_lo":{"g":"","p":{}},"profile_photo_nux_fbc_v2":{"g":"launch","p":{"prefill_photo":"true","skip_nux":"false"}},"profile_tabs":{"g":"","p":{}},"push_notifications":{"g":"","p":{}},"reg":{"g":"control_01_10","p":{"has_new_landing_appsells":"false","has_new_landing_page":"false"}},"reg_vp":{"g":"","p":{}},"feed_vp":{"g":"launch","p":{"is_hidden":"true"}},"report_haf":{"g":"","p":{}},"report_media":{"g":"","p":{}},"report_profile":{"g":"test","p":{"is_enabled":"true"}},"save":{"g":"test","p":{"is_enabled":"true"}},"sidecar":{"g":"","p":{}},"sidecar_swipe":{"g":"","p":{}},"su_universe":{"g":"test_login_autocomplete","p":{"use_autocomplete_signup":"true"}},"stale":{"g":"","p":{}},"stories_lo":{"g":"test_03_15","p":{"stories_profile":"true"}},"stories":{"g":"","p":{}},"tp_pblshr":{"g":"","p":{}},"video":{"g":"","p":{}},"gdpr_settings":{"g":"","p":{}},"gdpr_blocking_logout":{"g":"","p":{}},"gdpr_eu_tos":{"g":"","p":{}},"gdpr_row_tos":{"g":"test_05_01","p":{"tos_version":"row"}},"fd_gr":{"g":"control","p":{"show_post_back_button":"false"}},"felix":{"g":"test","p":{"is_enabled":"true"}},"felix_clear_fb_cookie":{"g":"control","p":{"is_enabled":"true","blacklist":"fbsr_124024574287414"}},"felix_creation_duration_limits":{"g":"dogfooding","p":{"minimum_length_seconds":"15","maximum_length_seconds":"600"}},"felix_creation_enabled":{"g":"","p":{"is_enabled":"true"}},"felix_creation_fb_crossposting":{"g":"control","p":{"is_enabled":"false"}},"felix_creation_fb_crossposting_v2":{"g":"control","p":{"is_enabled":"true"}},"felix_creation_validation":{"g":"control","p":{"edit_video_controls":"true"}},"felix_creation_video_upload":{"g":"","p":{}},"felix_early_onboarding":{"g":"","p":{}},"pride":{"g":"test","p":{"enabled":"true","hashtag_whitelist":"lgbt,lesbian,gay,bisexual,transgender,trans,queer,lgbtq,girlslikeus,girlswholikegirls,instagay,pride,gaypride,loveislove,pansexual,lovewins,transequalitynow,lesbiansofinstagram,asexual,nonbinary,lgbtpride,lgbta,lgbti,queerfashion,queers,queerpride,queerlife,marriageequality,pride2018,genderqueer,bi,genderfluid,lgbtqqia,comingout,intersex,transman,transwoman,twospirit,transvisibility,queerart,dragqueen,dragking,dragartist,twomoms,twodads,lesbianmoms,gaydads,gendernonconforming"}},"unfollow_confirm":{"g":"","p":{}},"profile_enhance_li":{"g":"control","p":{"has_tagged":"false"}},"profile_enhance_lo":{"g":"control","p":{"has_tagged":"false"}},"create_tag":{"g":"","p":{}}},"hostname":"www.instagram.com","platform":"ios","rhx_gis":"87a25368813608d393baaa28a0d6afb7","nonce":"zsP4NjzdJRIWmer6K5At1A==","zero_data":{},"rollout_hash":"5f72737283f8","bundle_variant":"base","probably_has_app":false,"show_app_install":true};</script>

And this is the code I am trying to execute.

s = str(soup.find_all("script", type="text/javascript")[3])
m = re.search(r"(?<=window._sharedData = )(?P<json>.*)(?=</script>)", s)

if m:
    data = json.loads(m.group('json'))
    print(data)
    for i in data['entry_data']["ProfilePage"]:
        for j in i['graphql']['user']['edge_owner_to_timeline_media']['edges']:
            print(j['node']["id"])

Upon running this, I am prompted with the following error:

json.decoder.JSONDecodeError: Extra data: line 1 column 12215 (char 12214)

I am completely lost and have no idea where I am going wrong. All help is appreciated and thanks to all of those who contribute in advance!

2

Answers


  1. There’s not quite enough here to debug, what you give for s doesn’t include the </script> so the pattern never matches when I run it locally, however when I append it, it seems to work correctly

    From the error it is clear that the contents of m.group('json') is not actually a valid JSON string so I suspect you need to work on your regular expression. Try printing out the value of m.group('json') (before attempting to parse it) and feeding that into a a json validator such as https://jsonlint.com/ which will direct you to where the error lies, perhaps that line terminates with a ; that you need to strip out or some other issue

    Login or Signup to reply.
  2. I think you could update your regex to match the json without the semicolon at the end by adding that to the positive lookahead (?=;</script>):

    (?<=window._sharedData = )(?P<json>.*)(?=;</script>)

    Your code might look like this without the [3] in the first line for your given example:

    s = str(soup.find_all("script", type="text/javascript"))
    m = re.search(r"(?<=window._sharedData = )(?P<json>.*)(?=;</script>)", s)
    if m:
        data = json.loads(m.group('json'))
        for i in data['entry_data']["ProfilePage"]:
            for j in i['graphql']['user']['edge_owner_to_timeline_media']['edges']:
                print(j['node']["id"])
    
    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search