Package amplee :: Package storage :: Module storetar
[hide private]
[frames] | no frames]

Source Code for Module amplee.storage.storetar

  1  # -*- coding: utf-8 -*- 
  2   
  3  __all__ = ['TarFileStorage', 'TarFileStoragePathInfo'] 
  4   
  5  import os, os.path, time 
  6  import tarfile 
  7  from tarfile import TarFile, TarInfo 
  8  try: 
  9      from glob import iglob as glob 
 10      from glob import iglob 
 11  except ImportError: 
 12      from glob import glob 
 13       
 14  try: 
 15      from StringIO import cStringIO as StringIO 
 16  except ImportError: 
 17      from StringIO import StringIO 
 18   
 19  from amplee.storage import Storage, StorageResourceInfo 
 20  from amplee.utils import safe_path_join 
 21  from amplee.error import UnknownResource 
 22   
23 -class TarFileStoragePathInfo(object):
24 - def __init__(self, archive_path, archive_sub_path=None):
25 """ 26 A wrapper around two values that matter to the TarFileStorage. 27 28 Keyword arguments: 29 archive_path -- absolute path to the tar archive 30 archive_sub_path -- sub path within the archive of any of its members 31 """ 32 self.archive_path = archive_path 33 self.archive_sub_path = archive_sub_path
34
35 -class TarFileStorage(Storage):
36 - def __init__(self, storage_path, compression='gz', encoding='utf-8'):
37 """ 38 Simple tar archive storage for amplee. 39 40 If storage path does not exist, it is created. 41 42 Keyword arguments: 43 storage_path -- absolute path to the top level directory which will contain 44 collections and resources 45 46 compression -- should be either None for no compression, gz for a gzip 47 compression or bz2 for a bzip2 compression scheme. 48 49 encoding -- used to encode the path elements 50 """ 51 self.storage_path = storage_path 52 if not os.path.exists(self.storage_path): 53 os.mkdir(self.storage_path) 54 self.encoding = encoding 55 self.compression = compression
56
57 - def shutdown(self):
58 """Does nothing effectively.""" 59 pass
60
61 - def create_container(self, collection_name):
62 """ 63 Creates a subdirectory within the storage directory 64 If it already exists does nothing. 65 66 Keyword argument 67 collection_name -- name of the directory to create 68 """ 69 path = safe_path_join(self.storage_path, collection_name) 70 if not os.path.exists(path): 71 os.mkdir(path) 72 73 return path
74
75 - def info(self, collection_name, resource_name=None, archive_sub_path=None):
76 """ 77 Returns a StorageResourceInfo which ``key`` 78 attribute is a TarFileStoragePathInfo. 79 """ 80 archive_sub_path = None 81 82 if isinstance(resource_name, unicode): 83 resource_name.encode(self.encoding) 84 85 if isinstance(archive_sub_path, unicode): 86 archive_sub_path.encode(self.encoding) 87 88 ti = TarFileStoragePathInfo(safe_path_join(self.storage_path, collection_name, 89 self.__get_archive_name(resource_name)), 90 archive_sub_path) 91 92 return StorageResourceInfo(resource_name, ti, collection_name)
93
94 - def __get_file_mode(self, opening_mode='r'):
95 """ 96 Returns teh correct file mode to open an archive. 97 98 Keyword arguments: 99 opening_mode -- how to open the archive, either in read mode 'r' 100 or in write mode 'w' 101 """ 102 if self.compression == 'gz': 103 return '%s:gz' % opening_mode 104 elif self.compression == 'bz2': 105 return '%s:bz2' % opening_mode 106 107 return opening_mode
108
109 - def __get_archive_name(self, resource_name):
110 """ 111 Returns the name of the archive with the correct extension 112 depending on the compression set on the storage. 113 114 Removes any extension in resource_name. 115 116 Keyword arguments: 117 resoure_name -- any resource name value 118 """ 119 # Just ensure we only return the last part of the path 120 resource_name, ext = os.path.splitext(os.path.basename(resource_name)) 121 122 if self.compression == 'gz': 123 return '%s.tar.gz' % resource_name 124 elif self.compression == 'bz2': 125 return '%s.tar.bz2' % resource_name 126 127 return '%s.tar' % resource_name
128
129 - def get_content(self, info):
130 """ 131 Returns a list of tuples (StorageResourceInfo instance, content) based on 132 provided path. 133 134 If the info.key.archive_sub_path is None, then the returned list contains 135 all the members of the archive and therefore its entire content. 136 137 Otherwise it returns only one single tuple matching the info.key.archive_sub_path 138 value. 139 140 Note also that the data read from the archive is not decoded and is 141 therefore returned as a byte string. 142 143 Keyword arguments 144 info -- as returned by info() 145 """ 146 path = info.key.archive_path 147 if isinstance(path, unicode): 148 path = path.encode(self.encoding) 149 150 results = [] 151 if os.path.exists(path) and tarfile.is_tarfile(path): 152 tar = None 153 try: 154 sub_path = info.key.archive_sub_path 155 if isinstance(sub_path, unicode): 156 sub_path = sub_path.encode(self.encoding) 157 158 tar = tarfile.open(path, self.__get_file_mode()) 159 if sub_path: 160 fileobj = tar.extractfile(sub_path) 161 ti = TarFileStoragePathInfo(info.key.archive_path, info.key.archive_sub_path) 162 pi = StorageResourceInfo(info.name, ti, info.collection_name) 163 results.append((pi, fileobj.read())) 164 else: 165 for tarinfo in tar: 166 ti = TarFileStoragePathInfo(info.key.archive_path, tarinfo.name) 167 pi = StorageResourceInfo(info.name, ti, info.collection_name) 168 fileobj = tar.extractfile(tarinfo) 169 results.append((pi, fileobj.read())) 170 finally: 171 if tar: 172 tar.close() 173 return results 174 175 raise UnknownResource(info.key.archive_path)
176
177 - def get_meta_data(self, info):
178 """ 179 Returns a byte string of the atom entry representing the member resource. 180 181 Note also that the data read from the archive is not decoded and is 182 therefore returned as a byte string. 183 184 Keyword arguments 185 info -- as returned by info() 186 """ 187 path = info.key.archive_path 188 if isinstance(path, unicode): 189 path = path.encode(self.encoding) 190 191 if os.path.exists(path) and tarfile.is_tarfile(path): 192 tar = None 193 try: 194 tar = tarfile.open(path, self.__get_file_mode()) 195 for tarinfo in tar: 196 fileobj = tar.extractfile(tarinfo) 197 ti = TarFileStoragePathInfo(info.key.archive_path, tarinfo.name) 198 pi = StorageResourceInfo(info.name, ti, info.collection_name) 199 # Should be the first entry in the archive (actually the only one) 200 return fileobj.read() 201 finally: 202 if tar: 203 tar.close() 204 205 raise UnknownResource(info.key.archive_path)
206
207 - def put_content(self, info, content, **kwargs):
208 """ 209 Replaces the tar achive at ``info.key`` with a new set of content. 210 211 The ``content`` parameter is a list of tuples of the following form: 212 (member_name_of_the_archive, data, data_length). 213 214 The first part of the tuple is the name used as the member of the archive 215 216 The second part is the data to be persisted. Thus must be a byte string, 217 unicode string or a fileobject which MUST return the full 218 content as a byte string on read() 219 220 The last part is the size to read from data and to persist into the 221 archive. 222 223 Note that if the data is unicode, it will be encoded using UTF-8. 224 225 Keyword arguments 226 info -- as returned by info() 227 content -- list of tuples of the data to persist 228 """ 229 if not content: 230 return 231 232 tar = None 233 try: 234 path = info.key.archive_path 235 if isinstance(path, unicode): 236 path = path.encode(self.encoding) 237 238 tar = tarfile.open(path, self.__get_file_mode('w')) 239 for (sub_path, data, length) in content: 240 if isinstance(data, str): 241 data = StringIO(data) 242 elif isinstance(data, unicode): 243 data = StringIO(data.encode(self.encoding)) 244 245 if isinstance(sub_path, unicode): 246 sub_path = sub_path.encode(self.encoding) 247 248 ti = TarInfo(sub_path) 249 ti.uid = os.getuid() 250 ti.gid = os.getgid() 251 ti.mtime = time.time() 252 ti.size = length 253 tar.addfile(ti, data) 254 finally: 255 if tar: 256 tar.close()
257
258 - def put_meta_data(self, info, content, **kwargs):
259 """ 260 Replaces the tar achive at ``info.key`` with a new set of content. 261 262 Keyword arguments 263 info -- as returned by info() 264 265 content -- byte string, unicode string or a file object 266 which returns the full data as a byte string on read() and 267 return the correct length on __len__ 268 """ 269 tar = None 270 try: 271 path = info.key.archive_path 272 if isinstance(path, unicode): 273 path = path.encode(self.encoding) 274 275 sub_path = info.key.archive_sub_path 276 if isinstance(sub_path, unicode): 277 sub_path = sub_path.encode(self.encoding) 278 279 tar = tarfile.open(path, self.__get_file_mode('w')) 280 ti = TarInfo(sub_path or 'mle.atom') 281 ti.uid = os.getuid() 282 ti.gid = os.getgid() 283 ti.mtime = time.time() 284 if isinstance(content, str): 285 ti.size = len(content) 286 content = StringIO(content) 287 elif isinstance(content, unicode): 288 ti.size = len(content) 289 content = StringIO(content.encode(self.encoding)) 290 else: 291 ti.size = len(content) 292 tar.addfile(ti, content) 293 finally: 294 if tar: 295 tar.close()
296
297 - def remove_content(self, info):
298 """ 299 Remove the resource at 'info.key' 300 301 Keyword arguments 302 info -- as returned by info() 303 """ 304 try: 305 os.unlink(info.key.archive_path) 306 except OSError: 307 pass
308
309 - def remove_meta_data(self, info):
310 """ 311 Remove the resource at 'info.key' 312 313 Keyword arguments 314 info -- as returned by info() 315 """ 316 self.remove_content(info)
317
318 - def persist(self, *args, **kwargs):
319 """ 320 Does nothing in the filesystem storage 321 """ 322 pass
323
324 - def exists(self, info):
325 """ 326 Returns True if the resource at 'info' exists. False otherwise. 327 328 If info.key.archive_sub_path is set it will lookup in the 329 archive if it exists. 330 331 Keyword arguments 332 info -- as returned by info() 333 """ 334 path = info.key.archive_path 335 if isinstance(path, unicode): 336 path = path.encode(self.encoding) 337 338 if not (os.path.exists(path) and tarfile.is_tarfile(path)): 339 return False 340 341 if info.key.archive_sub_path: 342 sub_path = info.key.archive_sub_path 343 if isinstance(sub_path, unicode): 344 sub_path = sub_path.encode(self.encoding) 345 346 tar = tarfile.open(path, self.__get_file_mode()) 347 try: 348 tar.getmember(sub_path) 349 except KeyError: 350 return False 351 352 return True
353
354 - def ls(self, collection_name, ext=None):
355 """ 356 List resources with the provided extension in a collection 357 358 Note that this will open each archive within the collection that 359 matches the compression set for the storage: 360 * 'gz' will result in searching through '*.tar.gz' 361 * 'bz2' will result in searching through '*.tar.bz2' 362 * None will result in searching through '*.tar' 363 364 The speed of this method will therefore decrease with the number 365 of archives stored. 366 367 Keyword arguments 368 collection_name -- name of the directory in the working copy 369 containing all the members of a collection. Created if it does 370 not exists. 371 372 ext -- extension of resources to return 373 """ 374 collection_path = self.create_container(collection_name) 375 376 if self.compression == 'gz': 377 archives = glob('%s/*.tar.gz' % (collection_path,)) 378 elif self.compression == 'bz2': 379 archives = glob('%s/*.tar.bz2' % (collection_path,)) 380 else: 381 archives = glob('%s/*.tar' % (collection_path,)) 382 383 members = {} 384 for archive in archives: 385 basename = os.path.basename(archive) 386 tar = tarfile.open(archive, self.__get_file_mode()) 387 for tarinfo in tar: 388 ti = TarFileStoragePathInfo(archive, tarinfo.name) 389 if ext: 390 if tarinfo.name.endswith(ext): 391 members[basename] = StorageResourceInfo(basename, ti, 392 collection_name) 393 else: 394 members[basename] = StorageResourceInfo(basename, ti, 395 collection_name) 396 397 return members
398
399 - def ils(self, collection_name, ext=None):
400 """ 401 Yields resources with the provided extension in a collection 402 403 Note that this will open each archive within the collection that 404 matches the compression set for the storage: 405 * 'gz' will result in searching through '*.tar.gz' 406 * 'bz2' will result in searching through '*.tar.bz2' 407 * None will result in searching through '*.tar' 408 409 The speed of this method will therefore decrease with the number 410 of archives stored. 411 412 Keyword arguments 413 collection_name -- name of the directory in the working copy 414 containing all the members of a collection. Created if it does 415 not exists. 416 417 ext -- extension of resources to return 418 """ 419 if self.compression == 'gz': 420 archives = iglob('%s/*.tar.gz' % (collection_path,)) 421 elif self.compression == 'bz2': 422 archives = iglob('%s/*.tar.bz2' % (collection_path,)) 423 else: 424 archives = iglob('%s/*.tar' % (collection_path,)) 425 426 for archive in archives: 427 basename = os.path.basename(archive) 428 tar = tarfile.open(archive, self.__get_file_mode()) 429 for tarinfo in tar: 430 ti = TarFileStoragePathInfo(archive, tarinfo.name) 431 if ext: 432 if tarinfo.name.endswith(ext): 433 yield basename, StorageResourceInfo(basename, ti,