So I'm working with an API (AWS ElastiCache) that offers mandatory pagination of results. I need to get all results, so I took some time to work out this logic.
def combine_results(function, key, marker=0, **kwargs):
"""deal with manditory pagination of AWS result descriptions"""
results = []
while marker != None:
result = function(marker = marker, **kwargs)
marker = nested_lookup('Marker', result)[0]
results += nested_lookup(key, result)
return results
Not only is the AWS ElastiCache API paginated but it also appears
deeply nested in lists and dicts.
I use this to burn it with fire:
def nested_lookup(key, dictionary):
"""Lookup a key in a nested dictionary, return a list of values"""
return list(_nested_lookup(key, dictionary))
def _nested_lookup(key, dictionary):
"""
Lookup a key in a nested dictionary, return value
Authors: Dougles Miranda and Russell Ballestrini
"""
if isinstance(dictionary, list):
for d in dictionary:
for result in _nested_lookup(key, d):
yield result
if isinstance(dictionary, dict):
for k, v in dictionary.iteritems():
if k == key:
yield v
elif isinstance(v, dict):
for result in _nested_lookup(key, v):
yield result
elif isinstance(v, list):
for d in v:
for result in _nested_lookup(key, d):
yield result
The end result is we have access to paginated and deeply nested data with a simple to use function:
>>> from lib import combine_results, nested_lookup
>>> d = elasticache_connection.describe_cache_clusters()
>>> nested_lookup('CacheClusterId', d)
[u'demo04-a-redis', u'demo04-b-redis', u'demo06-a-redis', u'demo06-b-redis', u'test-a-memcached', u'test-b-redis', u'ops01-redis', u'qa01-redis', u'ops02-redis', u'qa02-redis', u'int01-a-redis', u'int01-b-redis', u'ops03-redis', u'ops04-redis']
Here are some unit tests to prove these functions work like expected:
from unittest import TestCase
from lib.util import (
combine_results,
nested_lookup,
_nested_lookup,
)
def my_func_that_paginates(max_results=3, marker=0):
"""this function sort of mocks the paginated AWS description results"""
data = [
{'desired_key' : 0},
{'desired_key' : 1},
{'desired_key' : 2},
{'desired_key' : 3},
{'desired_key' : 4},
{'desired_key' : 5},
{'desired_key' : 6},
{'desired_key' : 7},
{'desired_key' : 8},
{'desired_key' : 9},
]
new_marker = marker + max_results
if new_marker > len(data):
# last page!
page = data[marker:]
return {'results' : page, 'Marker' : None}
page = data[marker:new_marker]
return {'results' : page, 'Marker' : new_marker}
class TestCombineResults(TestCase):
def test_combine_results_returns_all_results(self):
expected_set = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
f = my_func_that_paginates
result_set = set(combine_results(f, 'desired_key'))
self.assertSetEqual(expected_set, result_set)
class TestNestedLookup(TestCase):
def setUp(self):
self.subject_dict = {'a':1,'b':{'d':100},'c':{'d':200}}
def test_nested_lookup(self):
results = nested_lookup('d', self.subject_dict)
self.assertEqual(2, len(results))
self.assertIn(100, results)
self.assertIn(200, results)
self.assertSetEqual({100,200}, set(results))
def test_nested_lookup_wrapped_in_list(self):
results = nested_lookup('d', [{}, self.subject_dict, {}])
self.assertEqual(2, len(results))
self.assertIn(100, results)
self.assertIn(200, results)
self.assertSetEqual({100,200}, set(results))
def test_nested_lookup_wrapped_in_list_in_dict_in_list(self):
results = nested_lookup('d', [{}, {'H' : [self.subject_dict]} ])
self.assertEqual(2, len(results))
self.assertIn(100, results)
self.assertIn(200, results)
self.assertSetEqual({100,200}, set(results))
def test_nested_lookup_wrapped_in_list_in_list(self):
results = nested_lookup('d', [ {}, [self.subject_dict, {}] ])
self.assertEqual(2, len(results))
self.assertIn(100, results)
self.assertIn(200, results)
self.assertSetEqual({100,200}, set(results))
With this test, the steps of the algorithm looks like this:
{'Marker': 3, 'results': [{'desired_key': 0}, {'desired_key': 1}, {'desired_key': 2}]}
3
[0, 1, 2]
[0, 1, 2]
{'Marker': 6, 'results': [{'desired_key': 3}, {'desired_key': 4}, {'desired_key': 5}]}
6
[3, 4, 5]
[0, 1, 2, 3, 4, 5]
{'Marker': 9, 'results': [{'desired_key': 6}, {'desired_key': 7}, {'desired_key': 8}]}
9
[6, 7, 8]
[0, 1, 2, 3, 4, 5, 6, 7, 8]
{'Marker': None, 'results': [{'desired_key': 9}]}
None
[9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
ok