#!/usr/bin/python import pycurl import json import os, sys #import math """ /* * set variables */ """ from StringIO import StringIO limit_count = 0 # set zero for first run fileExtention = "json" # reset to change the format of records (dc, xml, json) fileNameStem = "LatinAmColl" howMany = "_COUNT." allPlease = "_ALL." fnCount = fileNameStem + howMany + fileExtention fnAll = fileNameStem + allPlease + fileExtention print fnAll maxLimit = 255 starting_record = 1 series = "Latin%20American%20pamphlet%20digital%20project%20at%20Harvard%20University" # series = "Harvard%20College%20Library%20preservation%20digitization%20pro*" libcloud_api_stem = "http://api.lib.harvard.edu/v2/items." libcloud_call = libcloud_api_stem + fileExtention + "?limit=" + str(limit_count) + "&start=" + str(starting_record) + "&title=" + series # print "Max limit = " + str(maxLimit) # Dude, you cannot concatenate int(s) and str(s). I had to convert maxLimit to a string. # print "API stem: " + libcloud_api_stem # print "Entire URL: " + libcloud_call if os.path.isfile(fnAll): os.remove (fnAll) # As long as the file is opened in binary mode, both Python 2 and Python 3 # can write response body to it without decoding. # print str(fileName) + '_COUNT.' + str(fileExtention) # with open (fnCount + fileExtention), 'w') as f: with open (fnCount,'w') as f: c = pycurl.Curl() c.setopt(c.URL, libcloud_call) c.setopt(c.WRITEDATA, f) c.perform() c.close() """ /* * Read records count into variable */ """ # copy file content into a string var # jsonData = open('PresResCount.' + str(fileExtention), 'r') # with open('data.json') as data_file: with open(fnCount) as data_file: data = json.load(data_file) recCount = data["pagination"]["numFound"] # print recCount """ /* * Calculate number of retrievals needed to collect all records */ """ numPasses = recCount / maxLimit remainder = recCount % maxLimit numPasses_int = int(numPasses) limit_count = maxLimit # fileExtention = "json"; # reset to change the format of records (dc, xml, json) f = open (fnAll, 'a') # note, open for appending c = pycurl.Curl() fileExtention = "json" # reset to change the format of records (dc, xml, json) libcloud_call = libcloud_api_stem + fileExtention + "?limit=" + str(limit_count) + "&start=" + str(starting_record) + "&title=" + series c.setopt(c.URL, libcloud_call) c.setopt(c.WRITEDATA, f) # c.perform() # c.close() i = 1 # print str(i) print "Limit Count = " + str(limit_count) print int(numPasses) print int(remainder) for i in range(i,numPasses,1):# i <= numPasses_int): c.perform() if i == 1 : starting_record = 1 else: starting_record = ((i - 1) * maxLimit) print "Starting record = " + str(starting_record) c.close
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question