1
2
3 __all__ = ['TarFileStorage', 'TarFileStoragePathInfo']
4
5 import os, os.path, time
6 import tarfile
7 from tarfile import TarFile, TarInfo
8 try:
9 from glob import iglob as glob
10 from glob import iglob
11 except ImportError:
12 from glob import glob
13
14 try:
15 from StringIO import cStringIO as StringIO
16 except ImportError:
17 from StringIO import StringIO
18
19 from amplee.storage import Storage, StorageResourceInfo
20 from amplee.utils import safe_path_join
21 from amplee.error import UnknownResource
22
24 - def __init__(self, archive_path, archive_sub_path=None):
25 """
26 A wrapper around two values that matter to the TarFileStorage.
27
28 Keyword arguments:
29 archive_path -- absolute path to the tar archive
30 archive_sub_path -- sub path within the archive of any of its members
31 """
32 self.archive_path = archive_path
33 self.archive_sub_path = archive_sub_path
34
36 - def __init__(self, storage_path, compression='gz', encoding='utf-8'):
37 """
38 Simple tar archive storage for amplee.
39
40 If storage path does not exist, it is created.
41
42 Keyword arguments:
43 storage_path -- absolute path to the top level directory which will contain
44 collections and resources
45
46 compression -- should be either None for no compression, gz for a gzip
47 compression or bz2 for a bzip2 compression scheme.
48
49 encoding -- used to encode the path elements
50 """
51 self.storage_path = storage_path
52 if not os.path.exists(self.storage_path):
53 os.mkdir(self.storage_path)
54 self.encoding = encoding
55 self.compression = compression
56
58 """Does nothing effectively."""
59 pass
60
62 """
63 Creates a subdirectory within the storage directory
64 If it already exists does nothing.
65
66 Keyword argument
67 collection_name -- name of the directory to create
68 """
69 path = safe_path_join(self.storage_path, collection_name)
70 if not os.path.exists(path):
71 os.mkdir(path)
72
73 return path
74
75 - def info(self, collection_name, resource_name=None, archive_sub_path=None):
76 """
77 Returns a StorageResourceInfo which ``key``
78 attribute is a TarFileStoragePathInfo.
79 """
80 archive_sub_path = None
81
82 if isinstance(resource_name, unicode):
83 resource_name.encode(self.encoding)
84
85 if isinstance(archive_sub_path, unicode):
86 archive_sub_path.encode(self.encoding)
87
88 ti = TarFileStoragePathInfo(safe_path_join(self.storage_path, collection_name,
89 self.__get_archive_name(resource_name)),
90 archive_sub_path)
91
92 return StorageResourceInfo(resource_name, ti, collection_name)
93
95 """
96 Returns teh correct file mode to open an archive.
97
98 Keyword arguments:
99 opening_mode -- how to open the archive, either in read mode 'r'
100 or in write mode 'w'
101 """
102 if self.compression == 'gz':
103 return '%s:gz' % opening_mode
104 elif self.compression == 'bz2':
105 return '%s:bz2' % opening_mode
106
107 return opening_mode
108
110 """
111 Returns the name of the archive with the correct extension
112 depending on the compression set on the storage.
113
114 Removes any extension in resource_name.
115
116 Keyword arguments:
117 resoure_name -- any resource name value
118 """
119
120 resource_name, ext = os.path.splitext(os.path.basename(resource_name))
121
122 if self.compression == 'gz':
123 return '%s.tar.gz' % resource_name
124 elif self.compression == 'bz2':
125 return '%s.tar.bz2' % resource_name
126
127 return '%s.tar' % resource_name
128
129 - def get_content(self, info):
130 """
131 Returns a list of tuples (StorageResourceInfo instance, content) based on
132 provided path.
133
134 If the info.key.archive_sub_path is None, then the returned list contains
135 all the members of the archive and therefore its entire content.
136
137 Otherwise it returns only one single tuple matching the info.key.archive_sub_path
138 value.
139
140 Note also that the data read from the archive is not decoded and is
141 therefore returned as a byte string.
142
143 Keyword arguments
144 info -- as returned by info()
145 """
146 path = info.key.archive_path
147 if isinstance(path, unicode):
148 path = path.encode(self.encoding)
149
150 results = []
151 if os.path.exists(path) and tarfile.is_tarfile(path):
152 tar = None
153 try:
154 sub_path = info.key.archive_sub_path
155 if isinstance(sub_path, unicode):
156 sub_path = sub_path.encode(self.encoding)
157
158 tar = tarfile.open(path, self.__get_file_mode())
159 if sub_path:
160 fileobj = tar.extractfile(sub_path)
161 ti = TarFileStoragePathInfo(info.key.archive_path, info.key.archive_sub_path)
162 pi = StorageResourceInfo(info.name, ti, info.collection_name)
163 results.append((pi, fileobj.read()))
164 else:
165 for tarinfo in tar:
166 ti = TarFileStoragePathInfo(info.key.archive_path, tarinfo.name)
167 pi = StorageResourceInfo(info.name, ti, info.collection_name)
168 fileobj = tar.extractfile(tarinfo)
169 results.append((pi, fileobj.read()))
170 finally:
171 if tar:
172 tar.close()
173 return results
174
175 raise UnknownResource(info.key.archive_path)
176
206
207 - def put_content(self, info, content, **kwargs):
208 """
209 Replaces the tar achive at ``info.key`` with a new set of content.
210
211 The ``content`` parameter is a list of tuples of the following form:
212 (member_name_of_the_archive, data, data_length).
213
214 The first part of the tuple is the name used as the member of the archive
215
216 The second part is the data to be persisted. Thus must be a byte string,
217 unicode string or a fileobject which MUST return the full
218 content as a byte string on read()
219
220 The last part is the size to read from data and to persist into the
221 archive.
222
223 Note that if the data is unicode, it will be encoded using UTF-8.
224
225 Keyword arguments
226 info -- as returned by info()
227 content -- list of tuples of the data to persist
228 """
229 if not content:
230 return
231
232 tar = None
233 try:
234 path = info.key.archive_path
235 if isinstance(path, unicode):
236 path = path.encode(self.encoding)
237
238 tar = tarfile.open(path, self.__get_file_mode('w'))
239 for (sub_path, data, length) in content:
240 if isinstance(data, str):
241 data = StringIO(data)
242 elif isinstance(data, unicode):
243 data = StringIO(data.encode(self.encoding))
244
245 if isinstance(sub_path, unicode):
246 sub_path = sub_path.encode(self.encoding)
247
248 ti = TarInfo(sub_path)
249 ti.uid = os.getuid()
250 ti.gid = os.getgid()
251 ti.mtime = time.time()
252 ti.size = length
253 tar.addfile(ti, data)
254 finally:
255 if tar:
256 tar.close()
257
296
297 - def remove_content(self, info):
298 """
299 Remove the resource at 'info.key'
300
301 Keyword arguments
302 info -- as returned by info()
303 """
304 try:
305 os.unlink(info.key.archive_path)
306 except OSError:
307 pass
308
317
318 - def persist(self, *args, **kwargs):
319 """
320 Does nothing in the filesystem storage
321 """
322 pass
323
325 """
326 Returns True if the resource at 'info' exists. False otherwise.
327
328 If info.key.archive_sub_path is set it will lookup in the
329 archive if it exists.
330
331 Keyword arguments
332 info -- as returned by info()
333 """
334 path = info.key.archive_path
335 if isinstance(path, unicode):
336 path = path.encode(self.encoding)
337
338 if not (os.path.exists(path) and tarfile.is_tarfile(path)):
339 return False
340
341 if info.key.archive_sub_path:
342 sub_path = info.key.archive_sub_path
343 if isinstance(sub_path, unicode):
344 sub_path = sub_path.encode(self.encoding)
345
346 tar = tarfile.open(path, self.__get_file_mode())
347 try:
348 tar.getmember(sub_path)
349 except KeyError:
350 return False
351
352 return True
353
354 - def ls(self, collection_name, ext=None):
355 """
356 List resources with the provided extension in a collection
357
358 Note that this will open each archive within the collection that
359 matches the compression set for the storage:
360 * 'gz' will result in searching through '*.tar.gz'
361 * 'bz2' will result in searching through '*.tar.bz2'
362 * None will result in searching through '*.tar'
363
364 The speed of this method will therefore decrease with the number
365 of archives stored.
366
367 Keyword arguments
368 collection_name -- name of the directory in the working copy
369 containing all the members of a collection. Created if it does
370 not exists.
371
372 ext -- extension of resources to return
373 """
374 collection_path = self.create_container(collection_name)
375
376 if self.compression == 'gz':
377 archives = glob('%s/*.tar.gz' % (collection_path,))
378 elif self.compression == 'bz2':
379 archives = glob('%s/*.tar.bz2' % (collection_path,))
380 else:
381 archives = glob('%s/*.tar' % (collection_path,))
382
383 members = {}
384 for archive in archives:
385 basename = os.path.basename(archive)
386 tar = tarfile.open(archive, self.__get_file_mode())
387 for tarinfo in tar:
388 ti = TarFileStoragePathInfo(archive, tarinfo.name)
389 if ext:
390 if tarinfo.name.endswith(ext):
391 members[basename] = StorageResourceInfo(basename, ti,
392 collection_name)
393 else:
394 members[basename] = StorageResourceInfo(basename, ti,
395 collection_name)
396
397 return members
398
399 - def ils(self, collection_name, ext=None):
400 """
401 Yields resources with the provided extension in a collection
402
403 Note that this will open each archive within the collection that
404 matches the compression set for the storage:
405 * 'gz' will result in searching through '*.tar.gz'
406 * 'bz2' will result in searching through '*.tar.bz2'
407 * None will result in searching through '*.tar'
408
409 The speed of this method will therefore decrease with the number
410 of archives stored.
411
412 Keyword arguments
413 collection_name -- name of the directory in the working copy
414 containing all the members of a collection. Created if it does
415 not exists.
416
417 ext -- extension of resources to return
418 """
419 if self.compression == 'gz':
420 archives = iglob('%s/*.tar.gz' % (collection_path,))
421 elif self.compression == 'bz2':
422 archives = iglob('%s/*.tar.bz2' % (collection_path,))
423 else:
424 archives = iglob('%s/*.tar' % (collection_path,))
425
426 for archive in archives:
427 basename = os.path.basename(archive)
428 tar = tarfile.open(archive, self.__get_file_mode())
429 for tarinfo in tar:
430 ti = TarFileStoragePathInfo(archive, tarinfo.name)
431 if ext:
432 if tarinfo.name.endswith(ext):
433 yield basename, StorageResourceInfo(basename, ti,