Source code for wtools.fileio

"""This module holds several methods for standard file I/O for the data formats
that we work with regularly. Much of this regarding `Grid` objects in inherrited
directly into the `Grid` class.
"""

__all__ = [
    'read_gslib',
    'save_gslib',
    'GridFileIO',
    'load_models',
    'save_pickle',
    'load_pickle',
]

__displayname__ = 'File I/O'

import pandas as pd
import numpy as np

import glob
import os
import warnings
import datetime
import json
import pickle

import discretize

from .transform import transpose
from .models import Models


[docs]def save_pickle(filename, data): """Pickles a data object in a Python 2 AND 3 friendly manner""" with open(filename, 'wb') as f: pickle.dump(data, f, protocol=2)
[docs]def load_pickle(filename): """Reads a pickled data object""" with open(filename, 'rb' ) as f: obj = pickle.load(f) return obj
[docs]def read_gslib(filename): """This will read a standard GSLib or GeoEAS data file to a pandas ``DataFrame``. Args: filename (str): the string file name of the data to load. This can be a relative or abslute file path. Return: pandas.DataFrame: A table containing the all data arrays. Note that an attribute called ``header`` is added to the data frame contianing the string header line of the file. """ with open(filename, 'r') as f: head = f.readline().strip() num = int(f.readline().strip()) ts = [] for i in range(num): ts.append(f.readline().strip()) df = pd.read_csv(f, names=ts, delim_whitespace=True) df.header = head return df
[docs]def save_gslib(filename, dataframe, header=None): """This will save a pandas dataframe to a GSLib file""" if header is None: try: header = dataframe.header except AttributeError: warnings.warn('Header not defined. Using date') header = str(datetime.datetime.now()) if '\n' in header: raise RuntimeError('`header` can only be 1 line.') datanames = '\n'.join(dataframe.columns) with open(filename, 'w') as f: f.write('%s\n' % header) f.write('%d\n' % len(dataframe.columns)) f.write(datanames) f.write('\n') dataframe.to_csv(f, sep=' ', header=None, index=False, float_format='%.9e') return 1
[docs]class GridFileIO(object): """ This class is inherrited by the :class:`~wtools.mesh.Grid` class and all these methods should be called from :class:`~wtools.mesh.Grid`. For example, If you have a file to read: Example: >>> import wtools >>> grid = wtools.Grid.read_sgems_grid('path/to/data/file.sgems') >>> grid.validate() True """
[docs] @classmethod def table_to_grid(Grid, df, shp, origin=[0.0, 0.0, 0.0], spacing=[1.0, 1.0, 1.0], order='F'): """Converts a pandas ``DataFrame`` table to a ``Grid`` object. Args: shp (tuple(int)): length 3 tuple of integers sizes for the data grid dimensions. origin (iter(float)): the southwest-bottom corner of the grid. spacing (iter(float)): the cell spacings for each axial direction. order (``'C'``, ``'F'``, ``'A'``), optional: the reshape order. Return: Grid: The data table loaded onto a ``Grid`` object. """ if not isinstance(shp, (list, tuple)) or len(shp) != 3: raise RuntimeError('`shp` must be a length 3 tuple.') for i, n in enumerate(shp): if not isinstance(n, int): raise RuntimeError('`shp` index ({}) must be an integer: ({}) is invalid'.format(i, n)) nx, ny, nz = shp # Now make a dictionary of the models d = {} for k in df.keys(): # Be sure to reshape using fortran ordering as SGeMS using <z,y,x> order d[k] = df[k].values.reshape(shp, order='F') grid = Grid(models=d, x0=origin, h=[np.full(nx, spacing[0], dtype=float), np.full(ny, spacing[1], dtype=float), np.full(nz, spacing[2], dtype=float),] ) grid.validate() return grid
[docs] @classmethod def read_sgems_grid(Grid, fname, origin=[0.0, 0.0, 0.0], spacing=[1.0, 1.0, 1.0]): """Reads an SGeMS grid file where grid shape is defined in the header as three integers seperated by whitespace. Data arrays are treated as 3D and given in <x, y, z> indexing to a ``Grid`` object. Args: fname (str): the string file name of the data to load. This can be a relative or abslute file path. origin (iter(float)): the southwest-bottom corner of the grid. spacing (iter(float)): the cell spacings for each axial direction Return: Grid: The SGeMS data loaded onto a ``Grid`` object. """ df = read_gslib(fname) shp = df.header.split() shp = [int(i) for i in shp] return Grid.table_to_grid(df, shp, origin=origin, spacing=spacing)
[docs] def save_sgems(self, filename): """This will save the grid in the SGeMS gridded data file format""" df = self.to_data_frame(order='F') return save_gslib(filename, df)
[docs] @classmethod def load_mesh(Grid, filename): """ Open a json file and load the mesh into the ``Grid`` class :param str filename: name of file to read in """ with open(filename, 'r') as outfile: jsondict = json.load(outfile) data = Grid.deserialize(jsondict, trusted=True) return data
[docs] def writeUBC(self, fileName, directory='', comment_lines=''): ext = os.path.splitext(fileName)[1] if ext is '': ext = '.msh' fileName = fileName + ext d = {} for k,v in self.models.items(): d['%s_%s.ubc' % (fileName.replace(ext, ''), k)] = v return discretize.TensorMesh.writeUBC(self, fileName, models=d, directory=directory, comment_lines=comment_lines)
[docs]def load_models(filename): """ Open a json file and loads the models into the target class As long as there are no namespace conflicts, the target __class__ will be stored on the properties.HasProperties registry and may be fetched from there. :param str filename: name of file to read in """ with open(filename, 'r') as infile: jsondict = json.load(infile) data = Models.deserialize(jsondict, trusted=True) return data