Source code for wwdata.Class_LabExperimBased

# -*- coding: utf-8 -*-
"""
Class_LabExperimBased provides functionalities for data handling of data obtained in lab experiments in the field of (waste)water treatment.
Copyright (C) 2016 Chaim De Mulder

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see http://www.gnu.org/licenses/.
"""

import sys
#import os
#from os import listdir
#import pandas as pd
#import scipy as sp
#import numpy as np
#import datetime as dt
import matplotlib.pyplot as plt   #plotten in python
import warnings as wn

from wwdata.Class_HydroData import HydroData

[docs]class LabExperimBased(HydroData):
    """
    Superclass for a HydroData object, expanding the functionalities with
    specific functions for data gathered is lab experiments.

    Attributes
    ----------
    timedata_column : str
        name of the column containing the time data
    data_type : str
        type of the data provided
    experiment_tag : str
        A tag identifying the experiment; can be a date or a code used by
        the producer/owner of the data.
    time_unit : str
        The time unit in which the time data is given
    units : array
        The units of the variables in the columns
    """

    def __init__(self,data,timedata_column='index',data_type='NAT',
                 experiment_tag='No tag given',time_unit=None):
        """
        initialisation of a LabExperimBased object, based on a previously defined
        HydroData object.
        """
        HydroData.__init__(self,data,timedata_column=timedata_column,data_type=data_type,
                           experiment_tag=experiment_tag,time_unit=time_unit)


[docs]    def hours(self,time_column='index'):
        """
        calculates the hours from the relative values

        Parameters
        ----------
        time_column : string
            column containing the relative time values; default to index
        """
        if time_column == 'index':
            self.data['index']=self.time.values
            self.data['h']= (self.data['indexes'])*24 + self.data['indexes'].shift(1)
            self.data['h'].fillna(0,inplace=True)
            self.data.drop('index', axis=1, inplace=True)
        else:
            self.data['h']= (self.data[time_column])*24 + self.data[time_column].shift(1)
            self.data['h'].fillna(0,inplace=True)


[docs]    def add_conc(self,column_name,x,y,new_name='default'):
        """
        calculates the concentration values of the given column and adds them as
        a new column to the DataFrame.

        Parameters
        ----------
        column_name : str
            column with values
        x : int
            ...
        y : int
            ...
        new_name : str
            name of the new column, default to 'column_name + mg/L'
        """
        if new_name == 'default':
            new_name = column_name + ' ' + 'mg/L'

        self.data[new_name] = self.data[column_name].values*x*y

    ## Instead of this function: define a dataframe/dict with conversion or
    ## concentration factors, so that you can have a function that automatically
    ## converts all parameters in the frame to concentrations

[docs]    def check_ph(self,ph_column='pH',thresh=0.4):
        """
        gives the maximal change in pH

        Parameters
        ----------
        ph_column : str
            column with pH-values, default to 'pH'
        threshold : int
            threshold value for warning, default to '0.4'
        """
        dph = self.data[ph_column].max()-self.data[ph_column].min()
        if dph > thresh:
            wn.warn('Strong change in pH during experiment!')
        else:
            self.delta_ph = dph

[docs]    def in_out(self,columns):
        """
        (start_values-end_values)

        Parameters
        ----------
        columns : array of strings
        """
        inv=0
        outv=0
        indexes= self.time.values
        for column in columns:
            inv += self.data[column][indexes[0]]
        for column in columns:
            outv += self.data[column][indexes[-1]]
        in_out = inv-outv

        return in_out


[docs]    def removal(self,columns):
        """
        total removal of nitrogen
        (1-(end_values/start_values))

        Parameters
        ----------
        columns : array of strings
        """
        inv=0
        outv=0
        indexes= self.time.values
        for column in columns:
            inv += self.data[column][indexes[0]]
        for column in columns:
            outv += self.data[column][indexes[-1]]
        removal = 1-(outv/inv)

        return removal

[docs]    def calc_slope(self,columns,time_column='h'):
        """
        calculates the slope of the selected columns

        Parameters
        ----------
        columns : array of strings
            columns to calculate the slope for
        time_column : str
            time used for calculation; default to 'h'

        """
        for column in columns:
            self.data[column + " " +'slope'] = (self.data[column].shift(1)-self.data[column])\
            /(self.data[time_column]-self.data[time_column].shift(1))

[docs]    def plot(self,columns,time_column='index'):
        """
        calculates the slope of the selected columns

        Parameters
        ----------
        columns : array of strings
            columns to plot
        time_column : str
            time used for calculation; default to 'h'
        """
        fig = plt.figure(figsize=(10,6))
        ax = fig.add_subplot(111)
        if time_column=='index':
            for column in columns:
                ax.plot(self.time,self.data[column],marker='o')
        else:
            for column in columns:
                ax.plot(self.data[time_column],self.data[column],marker='o')
        ax.legend()

        return fig,ax

#######################################

def _print_removed_output(original,new,type_):
    """
    function printing the output of functions that remove datapoints.

    Parameters
    ----------
    original : int
        original length of the dataset
    new : int
        length of the new dataset
    type_ : str
        'removed' or 'dropped'

    """
    print('Original dataset:',original,'datapoints')
    print('New dataset:',new,'datapoints')
    print(original-new,'datapoints ',type_)

def _log_removed_output(log_file,original,new,type_):
    """
    function writing the output of functions that remove datapoints to a log file.

    Parameters
    ----------
    log_file : str
        string containing the directory to the log file to be written out
    original : int
        original length of the dataset
    new : int
        length of the new dataset
    type_ : str
        'removed' or 'dropped'
    """
    log_file = open(log_file,'a')
    log_file.write(str('\nOriginal dataset: '+str(original)+' datapoints; new dataset: '+
                    str(new)+' datapoints'+str(original-new)+' datapoints ',type_))
    log_file.close()