diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py index 509245dd13..884b973df4 100644 --- a/swift/proxy/controllers/container.py +++ b/swift/proxy/controllers/container.py @@ -108,8 +108,9 @@ class ContainerController(Controller): req.swift_entity_path, concurrency) return resp - def _filter_resp_shard_ranges(self, req, cached_ranges): - # filter returned shard ranges according to request constraints + def _make_shard_ranges_response_body(self, req, shard_range_dicts): + # filter shard ranges according to request constraints and return a + # serialised list of shard ranges marker = get_param(req, 'marker', '') end_marker = get_param(req, 'end_marker') includes = get_param(req, 'includes') @@ -118,73 +119,100 @@ class ContainerController(Controller): marker, end_marker = end_marker, marker shard_ranges = [ ShardRange.from_dict(shard_range) - for shard_range in cached_ranges] + for shard_range in shard_range_dicts] shard_ranges = filter_shard_ranges(shard_ranges, includes, marker, end_marker) if reverse: shard_ranges.reverse() return json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii') - def _GET_using_cache(self, req, info): - # It may be possible to fulfil the request from cache: we only reach - # here if request record_type is 'shard' or 'auto', so if the container - # state is 'sharded' then look for cached shard ranges. However, if - # X-Newest is true then we always fetch from the backend servers. - get_newest = config_true_value(req.headers.get('x-newest', False)) - if get_newest: - self.logger.debug( - 'Skipping shard cache lookup (x-newest) for %s', req.path_qs) - elif (info and is_success(info['status']) and - info.get('sharding_state') == 'sharded'): - # container is sharded so we may have the shard ranges cached - headers = headers_from_container_info(info) - if headers: - # only use cached values if all required headers available - infocache = req.environ.setdefault('swift.infocache', {}) - memcache = cache_from_env(req.environ, True) - cache_key = get_cache_key(self.account_name, - self.container_name, - shard='listing') - cached_ranges = infocache.get(cache_key) - if cached_ranges is None and memcache: - skip_chance = \ - self.app.container_listing_shard_ranges_skip_cache - if skip_chance and random.random() < skip_chance: - self.logger.increment('shard_listing.cache.skip') + def _get_shard_ranges_from_cache(self, req, info): + headers = headers_from_container_info(info) + if not headers: + # only use cached values if all required headers available + return None + + infocache = req.environ.setdefault('swift.infocache', {}) + memcache = cache_from_env(req.environ, True) + cache_key = get_cache_key(self.account_name, + self.container_name, + shard='listing') + + resp_body = None + cached_range_dicts = infocache.get(cache_key) + if cached_range_dicts: + resp_body = self._make_shard_ranges_response_body( + req, cached_range_dicts) + elif memcache: + skip_chance = \ + self.app.container_listing_shard_ranges_skip_cache + if skip_chance and random.random() < skip_chance: + self.logger.increment('shard_listing.cache.skip') + else: + try: + cached_range_dicts = memcache.get( + cache_key, raise_on_error=True) + if cached_range_dicts: + cache_state = 'hit' + resp_body = self._make_shard_ranges_response_body( + req, cached_range_dicts) else: - try: - cached_ranges = memcache.get( - cache_key, raise_on_error=True) - cache_state = 'hit' if cached_ranges else 'miss' - except MemcacheConnectionError: - cache_state = 'error' - self.logger.increment( - 'shard_listing.cache.%s' % cache_state) + cache_state = 'miss' + except MemcacheConnectionError: + cache_state = 'error' + self.logger.increment( + 'shard_listing.cache.%s' % cache_state) - if cached_ranges is not None: - infocache[cache_key] = tuple(cached_ranges) - # shard ranges can be returned from cache - self.logger.debug('Found %d shards in cache for %s', - len(cached_ranges), req.path_qs) - headers.update({'x-backend-record-type': 'shard', - 'x-backend-cached-results': 'true'}) - shard_range_body = self._filter_resp_shard_ranges( - req, cached_ranges) - # mimic GetOrHeadHandler.get_working_response... - # note: server sets charset with content_type but proxy - # GETorHEAD_base does not, so don't set it here either - resp = Response(request=req, body=shard_range_body) - update_headers(resp, headers) - resp.last_modified = Timestamp( - headers['x-put-timestamp']).ceil() - resp.environ['swift_x_timestamp'] = headers.get( - 'x-timestamp') - resp.accept_ranges = 'bytes' - resp.content_type = 'application/json' - return resp + if resp_body is None: + resp = None + else: + # shard ranges can be returned from cache + infocache[cache_key] = tuple(cached_range_dicts) + self.logger.debug('Found %d shards in cache for %s', + len(cached_range_dicts), req.path_qs) + headers.update({'x-backend-record-type': 'shard', + 'x-backend-cached-results': 'true'}) + # mimic GetOrHeadHandler.get_working_response... + # note: server sets charset with content_type but proxy + # GETorHEAD_base does not, so don't set it here either + resp = Response(request=req, body=resp_body) + update_headers(resp, headers) + resp.last_modified = Timestamp(headers['x-put-timestamp']).ceil() + resp.environ['swift_x_timestamp'] = headers.get('x-timestamp') + resp.accept_ranges = 'bytes' + resp.content_type = 'application/json' - # The request was not fulfilled from cache so send to the backend - # server, but instruct the backend server to ignore name constraints in + return resp + + def _store_shard_ranges_in_cache(self, req, resp): + # parse shard ranges returned from backend, store them in infocache and + # memcache, and return a list of dicts + cache_key = get_cache_key(self.account_name, self.container_name, + shard='listing') + data = self._parse_listing_response(req, resp) + backend_shard_ranges = self._parse_shard_ranges(req, data, resp) + if backend_shard_ranges is None: + return None + + cached_range_dicts = [dict(sr) for sr in backend_shard_ranges] + if resp.headers.get('x-backend-sharding-state') == 'sharded': + # cache in infocache even if no shard ranges returned; this + # is unexpected but use that result for this request + infocache = req.environ.setdefault('swift.infocache', {}) + infocache[cache_key] = tuple(cached_range_dicts) + memcache = cache_from_env(req.environ, True) + if memcache and cached_range_dicts: + # cache in memcache only if shard ranges as expected + self.logger.debug('Caching %d shards for %s', + len(cached_range_dicts), req.path_qs) + memcache.set(cache_key, cached_range_dicts, + time=self.app.recheck_listing_shard_ranges) + return cached_range_dicts + + def _get_shard_ranges_from_backend(self, req): + # Make a backend request for shard ranges. The response is cached and + # then returned as a list of dicts. + # Note: We instruct the backend server to ignore name constraints in # request params if returning shard ranges so that the response can # potentially be cached. Only do this if the container state is # 'sharded'. We don't attempt to cache shard ranges for a 'sharding' @@ -209,33 +237,30 @@ class ContainerController(Controller): if (resp_record_type == 'shard' and sharding_state == 'sharded' and complete_listing): - # backend returned unfiltered listing state shard ranges so parse - # them and replace response body with filtered listing - cache_key = get_cache_key(self.account_name, self.container_name, - shard='listing') - data = self._parse_listing_response(req, resp) - backend_shard_ranges = self._parse_shard_ranges(req, data, resp) - if backend_shard_ranges is not None: - cached_ranges = [dict(sr) for sr in backend_shard_ranges] - if resp.headers.get('x-backend-sharding-state') == 'sharded': - # cache in infocache even if no shard ranges returned; this - # is unexpected but use that result for this request - infocache = req.environ.setdefault('swift.infocache', {}) - infocache[cache_key] = tuple(cached_ranges) - memcache = cache_from_env(req.environ, True) - if memcache and cached_ranges: - # cache in memcache only if shard ranges as expected - self.logger.debug('Caching %d shards for %s', - len(cached_ranges), req.path_qs) - memcache.set( - cache_key, cached_ranges, - time=self.app.recheck_listing_shard_ranges) - - # filter returned shard ranges according to request constraints - resp.body = self._filter_resp_shard_ranges(req, cached_ranges) - + cached_range_dicts = self._store_shard_ranges_in_cache(req, resp) + if cached_range_dicts: + resp.body = self._make_shard_ranges_response_body( + req, cached_range_dicts) return resp + def _GET_using_cache(self, req, info): + # It may be possible to fulfil the request from cache: we only reach + # here if request record_type is 'shard' or 'auto', so if the container + # state is 'sharded' then look for cached shard ranges. However, if + # X-Newest is true then we always fetch from the backend servers. + get_newest = config_true_value(req.headers.get('x-newest', False)) + if get_newest: + self.logger.debug( + 'Skipping shard cache lookup (x-newest) for %s', req.path_qs) + elif (info and is_success(info['status']) and + info.get('sharding_state') == 'sharded'): + # container is sharded so we may have the shard ranges cached + resp = self._get_shard_ranges_from_cache(req, info) + if resp: + return resp + # The request was not fulfilled from cache so send to backend server + return self._get_shard_ranges_from_backend(req) + def GETorHEAD(self, req): """Handler for HTTP GET/HEAD requests.""" ai = self.account_info(self.account_name, req)