Lord G. Lord G. - 9 months ago 40
Python Question

Going with while loop through ajax with scrapy

I got code with for loop, which works perfectly fine. However, I'm struggling implement while loop. It's looks like I'm getting empty json objects.How could I get 'while' working, bearing in mind that at some point json objects becomes {"data":[],"result":"ok"}

My while loop

def after_login(self,response):
if "smg" in response.body:
#for i in range(0,100,10):
minime = 2
i = 10
while len(self.parse_firstcall(response)['data']) > 1 or minime > 1:
print('------------------------------------')
print(len(self.parse_firstcall(response)['data']))
print(str(minime))
print(str(i))
print('-------------------------------------')
yield FormRequest(
url='URL',
formdata={'act': 'serial', 'type': 'search', 'o': str(i), 's': '3','t': '0'},
callback=self.parse_firstcall
)
minime = 0
i += 10
time.sleep(5)



def parse_firstcall(self,response):
try:
firstc = response.body
self.serialj = json.loads(firstc)
except:
self.serialj = {"data":['why', 'always', 'me'], "result": "ok"}
return self.serialj

Answer Source

The solution which I've found: There's no need for while loop here.Simple making I call and checking if data len() is bigger than 1

def after_login(self,response):
    if "smg" in response.body:      
                 yield FormRequest(
                    url='url',
                    formdata={'act': 'serial', 'type': 'search', 'o': str(self.req), 's': '3','t': '0'},
                    callback=self.parse_firstcall 
                                   )


def parse_firstcall(self,response):
            firstc = response.body      
            serialj = json.loads(firstc)
            if len(serialj['data']) > 1:
                print('///////////////////////////////////////////')
                print('Request number: ' +str(self.req)+ ' been made')
                print('///////////////////////////////////////////')
                for i in serialj['data']:
                    self.series[i['title_orig']] = i
                self.req += 10
                yield FormRequest(
                            url='url',
                            formdata={'act': 'serial', 'type': 'search',  'o': str(self.req), 's': '3','t': '0'},
                            callback=self.parse_firstcall
                                 )