[Checkins] SVN: Sandbox/luciano/kirbi/src/kirbi/fetch/ fetch
refactoring
Luciano Ramalho
luciano at ramalho.org
Mon Jul 30 21:10:03 EDT 2007
Log message for revision 78494:
fetch refactoring
Changed:
U Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_fetch.py
U Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_parse.py
A Sandbox/luciano/kirbi/src/kirbi/fetch/invalid-request.xml
-=-
Modified: Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_fetch.py
===================================================================
--- Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_fetch.py 2007-07-31 01:08:45 UTC (rev 78493)
+++ Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_fetch.py 2007-07-31 01:10:02 UTC (rev 78494)
@@ -1,20 +1,105 @@
#!/usr/bin/env python
+# encoding: utf-8
+try:
+ from lxml import etree
+except ImportError:
+ try:
+ import cElementTree as etree
+ except ImportError:
+ try:
+ import elementtree.ElementTree as etree
+ except ImportError:
+ raise ImportError, "Failed to import ElementTree from any known place"
+
import httplib2
from urllib import quote
from StringIO import StringIO
from time import sleep
+
"""
-NOTE: 0333647289 is a valid ISBN which generates a AWS.InvalidParameterValue
- from Amazon.com with message: "0333647289 is not a valid value for ItemId"
- The book is Virtual History: Alternatives and Counterfactuals
- by Niall Ferguson (Editor)
- Amazon.com does not have it but Amazon.co.uk does and
- Google query "isbn 0333647289" also found it here:
- http://www.alibris.com/search/search.cfm?qwork=7055972
+Structure of the AmazonECS XML response:
+
+ItemLookupResponse
+ OperationRequest
+ (...)
+ Items
+ Request
+ IsValid
+ ItemLookupRequest
+ ItemId
+ ResponseGroup
+ (Errors)
+ (Error)
+ (Code)
+ (Message)
+ (Item)
+ (ItemAttributes)
+ (Author)
+ (Creator Role=...)
+
+Notes:
+- Errors element occurs when ISBN is non-existent;
+ in that case, Code contains the string "AWS.InvalidParameterValue"
+- Author element is not always present
+- Author element may be duplicated with the same content,
+ except for whitespace; for example: ISBN=0141000511
"""
+FIELD_MAP = [
+ # Book schema -> Amazon ECS element
+ ('title', 'Title'),
+ ('isbn13', 'EAN'),
+ ('edition', 'Edition'),
+ ('publisher', 'Publisher'),
+ ('issued', 'PublicationDate'),
+ ]
+
+CREATOR_TAGS = ['Author', 'Creator']
+
+AMAZON_INVALID_PARAM = 'AWS.InvalidParameterValue'
+
+
+def nsPath(ns, path):
+ parts = path.split('/')
+ return '/'.join([ns+part for part in parts])
+
+def parse(xml):
+ tree = etree.parse(xml)
+ raiz = tree.getroot()
+ # get the XML namespace from the root tag
+ ns = raiz.tag.split('}')[0] + '}'
+ request = raiz.find(nsPath(ns,'Items/Request'))
+ error_code = request.findtext(nsPath(ns,'Errors/Error/Code'))
+ if error_code is None:
+ items = raiz.findall(nsPath(ns,'Items/Item'))
+ #TODO: treat multiple Item elements in Items
+ item = items[0].find(ns+'ItemAttributes')
+ book_dic = {}
+ for field, tag in FIELD_MAP:
+ elem = item.find(ns+tag)
+ if elem is not None:
+ book_dic[field] = elem.text
+ creators = []
+ for tag in CREATOR_TAGS:
+ for elem in item.findall(ns+tag):
+ if elem is None: continue
+ role = elem.attrib.get('Role')
+ if role:
+ creator = '%s (%s)' % (elem.text, role)
+ else:
+ creator = elem.text
+ creators.append(creator)
+ if creators:
+ book_dic['creators'] = creators
+ return book_dic
+
+ elif error_code == AMAZON_INVALID_PARAM:
+ return None
+ else:
+ raise LookupError, error_code
+
class AmazonECS(object):
base_url = """http://ecs.amazonaws.com/onca/xml"""
@@ -25,39 +110,52 @@
if AssociateTag:
self.base_params['AssociateTag'] = AssociateTag
self.httpcli = httplib2.Http('.cache')
-
+
def buildURL(self, **kw):
query = []
kw.update(self.base_params)
for key, val in kw.items():
query.append('%s=%s' % (key,quote(val)))
return self.base_url + '?' + '&'.join(query)
-
+
def getFile(self, url):
# Amazon.com ECS agreement imposes a limit of one request per second
sleep(1)
resp, content = self.httpcli.request(url, 'GET')
- self.tree = etree.parse(StringIO(content))
return resp, content
-
+
def itemLookup(self,itemId,response='ItemAttributes'):
- params = { 'Operation':'ItemLookup',
+ params = { 'Operation':'ItemLookup',
'ItemId':itemId,
'ResponseGroup':response
}
url = self.buildURL(**params)
return self.getFile(url)[1]
-
-if __name__=='__main__':
+
+if __name__ == '__main__':
+ import sys
+ from pprint import pprint
+ xml = file(sys.argv[1])
+ dic = parse(xml)
+ pprint(dic)
+
from amazon_config import ACCESS_KEY_ID, ASSOCIATE_TAG
-
+
ecs = AmazonECS(ACCESS_KEY_ID, ASSOCIATE_TAG)
alice = '0393048470'
gof = '0201633612'
awpr = '0977616630'
oss = '1565925823'
dup = '0141000511'
- print ecs.itemLookup(oss)
-
-
-
+ erro = '1231231239'
+ print ecs.itemLookup(erro)
+
+"""
+NOTE: 0333647289 is a valid ISBN which generates a AWS.InvalidParameterValue
+ from Amazon.com with message: "0333647289 is not a valid value for ItemId"
+ The book is Virtual History: Alternatives and Counterfactuals
+ by Niall Ferguson (Editor)
+ Amazon.com does not have it but Amazon.co.uk does and
+ Google query "isbn 0333647289" also found it here:
+ http://www.alibris.com/search/search.cfm?qwork=7055972
+"""
Modified: Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_parse.py
===================================================================
--- Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_parse.py 2007-07-31 01:08:45 UTC (rev 78493)
+++ Sandbox/luciano/kirbi/src/kirbi/fetch/amazon_parse.py 2007-07-31 01:10:02 UTC (rev 78494)
@@ -1,75 +1,103 @@
#!/usr/bin/env python
+# encoding: utf-8
-import httplib2
-from urllib import quote
-from lxml import etree
-from StringIO import StringIO
-from time import sleep
+"""
+Structure of the AmazonECS XML response:
+ItemLookupResponse
+ OperationRequest
+ (...)
+ Items
+ Request
+ IsValid
+ ItemLookupRequest
+ ItemId
+ ResponseGroup
+ (Errors)
+ (Error)
+ (Code)
+ (Message)
+ (Item)
+ (ItemAttributes)
+ (Author)
+ (Creator Role=...)
+
+Notes:
+- Errors element occurs when ISBN is non-existent;
+ in that case, Code contains the string "AWS.InvalidParameterValue"
+- Author element is not always present
+- Author element may be duplicated with the same content,
+ except for whitespace; for example: ISBN=0141000511
"""
-NOTE: 0333647289 is a valid ISBN which generates a AWS.InvalidParameterValue
- from Amazon.com with message: "0333647289 is not a valid value for ItemId"
- The book is Virtual History: Alternatives and Counterfactuals
- by Niall Ferguson (Editor)
- Amazon.com does not have it but Amazon.co.uk does and
- Google query "isbn 0333647289" also found it here:
- http://www.alibris.com/search/search.cfm?qwork=7055972
-"""
-class AmazonECS(object):
+try:
+ from lxml import etree
+except ImportError:
+ try:
+ import cElementTree as etree
+ except ImportError:
+ try:
+ import elementtree.ElementTree as etree
+ except ImportError:
+ print "Failed to import ElementTree from any known place"
- xml_namespace = """http://webservices.amazon.com/AWSECommerceService/2005-10-05"""
- base_url = """http://ecs.amazonaws.com/onca/xml"""
+FIELD_MAP = [
+ # Book schema -> Amazon ECS element
+ ('title', 'Title'),
+ ('isbn13', 'EAN'),
+ ('edition', 'Edition'),
+ ('publisher', 'Publisher'),
+ ('issued', 'PublicationDate'),
+ ]
- def __init__(self, AWSAccessKeyId, AssociateTag=None):
- self.base_params = { 'Service':'AWSECommerceService',
- 'AWSAccessKeyId':AWSAccessKeyId, }
- if AssociateTag:
- self.base_params['AssociateTag'] = AssociateTag
- self.httpcli = httplib2.Http('.cache')
-
- def buildURL(self, **kw):
- query = []
- kw.update(self.base_params)
- for key, val in kw.items():
- query.append('%s=%s' % (key,quote(val)))
- return self.base_url + '?' + '&'.join(query)
-
- def getFile(self, url):
- # Amazon.com ECS agreement imposes a limit of one request per second
- sleep(1)
- resp, content = self.httpcli.request(url, 'GET')
- self.tree = etree.parse(StringIO(content))
- return resp, content
-
- def buildQPath(path, ns):
- """build a path with fully qualified tags"""
- ns = '{%s}' % ns
- parts = path.split('/')
- return ns+('/'+ns).join(parts)
+CREATOR_TAGS = ['Author', 'Creator']
- def itemLookup(self,itemId,response='ItemAttributes'):
- params = { 'Operation':'ItemLookup',
- 'ItemId':itemId,
- 'ResponseGroup':response
- }
- url = self.buildURL(**params)
- return self.getFile(url)[1]
-
- def findAll(self,path):
- pass
+AMAZON_INVALID_PARAM = 'AWS.InvalidParameterValueXX'
-if __name__=='__main__':
- from amazon_config import ACCESS_KEY_ID, ASSOCIATE_TAG
-
- ecs = AmazonECS(ACCESS_KEY_ID, ASSOCIATE_TAG)
- alice = '0393048470'
- gof = '0201633612'
- awpr = '0977616630'
- oss = '1565925823'
- dup = '0141000511'
- print ecs.itemLookup(oss)
-
-
-
+
+def nsPath(ns, path):
+ parts = path.split('/')
+ return '/'.join([ns+part for part in parts])
+
+def parse(xml):
+ tree = etree.parse(xml)
+ raiz = tree.getroot()
+ # get the XML namespace from the root tag
+ ns = raiz.tag.split('}')[0] + '}'
+ request = raiz.find(nsPath(ns,'Items/Request'))
+ error_code = request.findtext(nsPath(ns,'Errors/Error/Code'))
+ if error_code is None:
+ items = raiz.findall(nsPath(ns,'Items/Item'))
+ #TODO: treat multiple Item elements in Items
+ item = items[0].find(ns+'ItemAttributes')
+ book_dic = {}
+ for field, tag in FIELD_MAP:
+ elem = item.find(ns+tag)
+ if elem is not None:
+ book_dic[field] = elem.text
+ creators = []
+ for tag in CREATOR_TAGS:
+ for elem in item.findall(ns+tag):
+ if elem is None: continue
+ role = elem.attrib.get('Role')
+ if role:
+ creator = '%s (%s)' % (elem.text, role)
+ else:
+ creator = elem.text
+ creators.append(creator)
+ if creators:
+ book_dic['creators'] = creators
+ return book_dic
+
+ elif error_code == AMAZON_INVALID_PARAM:
+ return None
+ else:
+ raise LookupError, error_code
+
+if __name__ == '__main__':
+ import sys
+ from pprint import pprint
+ xml = file(sys.argv[1])
+ dic = parse(xml)
+ pprint(dic)
Added: Sandbox/luciano/kirbi/src/kirbi/fetch/invalid-request.xml
===================================================================
--- Sandbox/luciano/kirbi/src/kirbi/fetch/invalid-request.xml (rev 0)
+++ Sandbox/luciano/kirbi/src/kirbi/fetch/invalid-request.xml 2007-07-31 01:10:02 UTC (rev 78494)
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ItemLookupResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05">
+ <OperationRequest>
+ <HTTPHeaders>
+ <Header Name="UserAgent" Value="Python-httplib2/$Rev: 235 $"></Header>
+ </HTTPHeaders>
+ <RequestId>0NV290TFMXVW0Y09CXTA</RequestId>
+ <Arguments>
+ <Argument Name="AssociateTag" Value="circulante-20"></Argument>
+ <Argument Name="ItemId" Value="1231231239"></Argument>
+ <Argument Name="Service" Value="AWSECommerceService"></Argument>
+ <Argument Name="ResponseGroup" Value="ItemAttributes"></Argument>
+ <Argument Name="Operation" Value="ItemLookup"></Argument>
+ <Argument Name="AWSAccessKeyId" Value="13W2MMDG65QJJK9GG402"></Argument>
+ </Arguments>
+ <RequestProcessingTime>0.0118951797485352</RequestProcessingTime>
+ </OperationRequest>
+ <Items>
+ <Request>
+ <IsValid>True</IsValid>
+ <ItemLookupRequest>
+ <ItemId>1231231239</ItemId>
+ <ResponseGroup>ItemAttributes</ResponseGroup>
+ </ItemLookupRequest>
+ <Errors>
+ <Error>
+ <Code>AWS.InvalidParameterValue</Code>
+ <Message>1231231239 is not a valid value for ItemId. Please change this value and retry your request.</Message>
+ </Error>
+ </Errors>
+ </Request>
+ </Items>
+</ItemLookupResponse>
More information about the Checkins
mailing list