1
2
3 __doc__ = """Helper to find the best matching media-types.
4
5 Synopsis
6 --------
7
8 The Atom Publishing Protocol is very sensitive to the media-type
9 of resources it treats. Doing a simple string comparison is
10 certainly not enough in this case since media-types sent by
11 user-agents can be of different forms. This module provides only
12 one method called `get_best_mimetype` which does all the hard work
13 of finding the best matching media-type given a seed and a list
14 of acceptable media-types.
15
16 """
17
18 __all__ = ['get_best_mimetype']
19
20
21
22 import re
23
25 """An element (with parameters) from an HTTP header's element list."""
26
28 self.value = value
29 if params is None:
30 params = {}
31 self.params = params
32
34 p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()]
35 return u"%s%s" % (self.value, "".join(p))
36
39
41 """Transform 'token;key=val' to ('token', {'key': 'val'})."""
42
43
44 atoms = [x.strip() for x in elementstr.split(";")]
45 initial_value = atoms.pop(0).strip()
46 params = {}
47 for atom in atoms:
48 atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
49 key = atom.pop(0)
50 if atom:
51 val = atom[0]
52 else:
53 val = ""
54 params[key] = val
55 return initial_value, params
56 parse = staticmethod(parse)
57
59 """Construct an instance from a string of the form 'token;key=val'."""
60 ival, params = cls.parse(elementstr)
61 return cls(ival, params)
62 from_str = classmethod(from_str)
63
64
65 q_separator = re.compile(r'; *q *=')
66
68 """An element (with parameters) from an Accept-* header's element list."""
69
85 from_str = classmethod(from_str)
86
88 val = self.params.get("q", "1")
89 if isinstance(val, HeaderElement):
90 val = val.value
91 return float(val)
92 qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
93
95
96
97 diff = cmp(other.qvalue, self.qvalue)
98 if diff == 0:
99 diff = cmp(str(other), str(self))
100 return diff
101
102
103
104
105 -def get_best_mimetype(header_value, within, default=None, check_params=False, return_full=False):
106 """Iterates through 'header_value' and checks if it finds any match in 'within'.
107
108 When */* is part of header_value and no candidate was found this
109 function returns the first media-type of 'within'
110
111 Consider the following examples:
112
113 >>> from amplee.http_helper import get_best_mimetype
114 >>> l = ['application/rdf+xml', 'application/atom+xml']
115 >>> a = 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'
116 >>> get_best_mimetype(a, l)
117 >>> 'application/rdf+xml'
118 >>> get_best_mimetype(a, l, 'application/atom+xml')
119 'application/atom+xml'
120 >>> a = 'text/xml,application/xml,application/atom+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5'
121 >>> get_best_mimetype(a, l)
122 'application/atom+xml'
123 >>> l = ['application/rdf+xml', 'entry']
124 >>> get_best_mimetype(a, l)
125 'application/atom+xml'
126 >>> a = 'text/xml,application/xml,text/html;q=0.9,text/plain;q=0.8,image/png'
127 >>> get_best_mimetype(a, l) # returns None
128 >>> a = 'application/xml;q=0.9,application/rdf+xml;q=0.8,application/atom+xml'
129 >>> l = ['application/rdf+xml', 'entry']
130 >>> get_best_mimetype(a, l)
131 'application/atom+xml'
132 >>> a = 'application/xml;q=0.9,application/rdf+xml;q=0.8,application/atom+xml;q=0.1'
133 >>> get_best_mimetype(a, l)
134 'application/rdf+xml'
135 >>> a = 'application/xml,application/rdf+xml;q=0.8,application/atom+xml'
136 >>> get_best_mimetype(a, l)
137 'application/atom+xml'
138 >>> l = ['application/rdf+xml', 'application/atom+xml', 'application/xhtml+xml']
139 >>> a = 'application/*'
140 >>> get_best_mimetype(a, l)
141 >>> 'application/xhtml+xml'
142 >>> l = [u'application/atom+xml;type=entry', u'application/x-www-form-urlencoded']
143 >>> a = u'application/atom+xml;type=entry;some=yu'
144 >>> get_best_mimetype(a, l, check_params=True)
145 >>> a = u'application/atom+xml;type=entry'
146 >>> get_best_mimetype(a, l, check_params=True)
147 u'application/atom+xml'
148 >>> get_best_mimetype(a, l, check_params=True, return_full=True)
149 u'application/atom+xml;type=entry'
150 >>> a = u'application/atom+xml;type=entry;some=yu'
151 >>> get_best_mimetype(a, l, check_params=['type'])
152 u'application/atom+xml'
153 >>> get_best_mimetype(a, l, check_params=['type'], return_full=True)
154 u'aPPlication/atom+xml; some=yu;type=entry'
155 >>> get_best_mimetype(a, l)
156 u'application/atom+xml'
157
158 The ``header_value`` is a string respecting the HTTP Accept header format
159 as defined in section 14.1 of RFC 2616.
160
161 The ``within`` argument is a list of acceptable media-type strings.
162
163 The ``default`` value is returned when no match was found.
164
165 The ``check_params``, if provided, may be a list of keys (string) that
166 should be matched between ``header_value`` and headers ``within`` or it
167 can be a boolean. If ``True`` then every parameters will be tested, if
168 ``False`` (default) the test won't occur. Setting it to ``True`` ensures
169 that if a match is found it will be exactly the one wanted but this is
170 a more restrictive matching scheme.
171
172 If ``return_full`` is ``True`` it returns the media-type along with its
173 parameters. Otherwise it returns only the ``media-type``.
174 """
175 if not header_value:
176 return default
177
178 tokens = [token.strip().lower() for token in header_value.split(',')]
179
180 candidate = None
181 match_any = False
182 for token in tokens:
183 header = AcceptElement.from_str(token)
184 if header.value == '*/*':
185 match_any = True
186
187 header_media_type = header_sub_type = None
188 if '/' in header.value:
189 header_media_type, header_sub_type = header.value.split('/')
190
191 header_left_token = header_right_token = None
192 if header_sub_type and '+' in header_sub_type:
193 header_left_token, header_right_token = header_sub_type.split('+')
194
195 for item in within:
196 mimetype = None
197
198 if item.lower() == 'entry':
199 item = AcceptElement.from_str('application/atom+xml;type=entry')
200 else:
201 item = AcceptElement.from_str(item)
202
203 if '/' in item.value:
204 media_type, sub_type = item.value.split('/')
205 if header_media_type == media_type:
206 if sub_type == '*':
207 mimetype = header
208 elif '+' in sub_type:
209 left_token, right_token = sub_type.split('+')
210 if right_token == header_right_token:
211 if left_token == header_left_token:
212 mimetype = header
213 elif header_left_token == '*':
214 mimetype = item
215 elif left_token == '*':
216 mimetype = header
217 elif header_sub_type == sub_type:
218 mimetype = header
219
220 if not mimetype and header_sub_type == '*':
221 mimetype = item
222 elif header.value == item.value:
223 mimetype = header
224
225 if not mimetype:
226 continue
227
228 if check_params != False:
229 with_param = False
230 if check_params == True:
231 header_params = header.params.keys()[:]
232 item_params = item.params.keys()[:]
233 with_param = True
234 if header_params and item_params:
235 with_param = False
236 header_params.sort()
237 item_params.sort()
238 if header_params == item_params:
239 for key in item_params:
240 potential = [item.params[key], '"%s"' % item.params[key]]
241 if header.params[key] in potential:
242 with_param = True
243 else:
244 with_param = False
245 break
246 elif isinstance(check_params, list):
247 if not header.params and not item.params:
248 with_param = True
249 else:
250 parameters = check_params[:]
251 for key in parameters:
252 if key in header.params and key in item.params:
253 potential = [item.params[key], '"%s"' % item.params[key]]
254 if header.params[key] in potential:
255 with_param = True
256 else:
257 with_param = False
258 break
259 if with_param and not candidate:
260 candidate = mimetype
261 elif with_param and mimetype.qvalue >= candidate.qvalue:
262 candidate = mimetype
263 elif not candidate:
264 candidate = mimetype
265 elif mimetype and mimetype.qvalue >= candidate.qvalue:
266 candidate = mimetype
267
268 if not candidate and match_any and within:
269 if not default:
270 candidate = AcceptElement.from_str(within[0])
271 else:
272 candidate = AcceptElement.from_str(default)
273 elif not candidate and default:
274 candidate = AcceptElement.from_str(default)
275
276 if not candidate:
277 return None
278
279 if return_full:
280 return unicode(candidate)
281
282 return candidate.value
283