Source code for wwdata.Class_LabSensorBased

#book# -*- coding: utf-8 -*-
"""
    Class_LabSensorBased provides functionalities for data handling of data obtained in lab experiments with online sensors in the field of (waste)water treatment.
    Copyright (C) 2016 Chaim De Mulder

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published
    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see http://www.gnu.org/licenses/.
"""
import sys
import os
from os import listdir
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt   #plotten in python
import datetime as dt

from wwdata.Class_HydroData import HydroData
from wwdata.data_reading_functions import *
from wwdata.time_conversion_functions import *

[docs]class LabSensorBased(HydroData):
    """
    Superclass for a HydroData object, expanding the functionalities with
    specific functions for data gathered is lab experiments

    Attributes
    ----------
    Attributes
    ----------
    timedata_column : str
        name of the column containing the time data
    data_type : str
        type of data provided
    experiment_tag : str
        A tag identifying the experiment; can be a date or a code used by
        the producer/owner of the data.
    time_unit : str
        The time unit in which the time data is given
    units : array
        The units of the variables in the columns
    """

    def __init__(self,data,experiment_tag='None'):
        """
        initialisation of a LabSensorBased object, based on a previously defined
        HydroData object.
        """
        HydroData.__init__(self,data,timedata_column,experiment_tag='No tag given')

[docs]    def drop_peaks(self,data_name,cutoff,inplace=True,log_file=None):
        """
        Filters out the peaks larger than a cut-off value in a dataseries

        Parameters
        ----------
        data_name : str
            the name of the column to use for the removal of peak values
        cutoff : int
            cut off value to use for the removing of peaks; values with an
            absolute value larger than this cut off will be removed from the data
        inplace : bool
            indicates whether a new dataframe is created and returned or whether
            the operations are executed on the existing dataframe (nothing is
            returned)
        log_file : str
            string containing the directory to a log file to be written out
            when using this function

        Returns
        -------
        LabSensorBased object (if inplace=False)
            the dataframe from which the double values of 'data' are removed
        None (if inplace=True)
        """
        original = len(self.data)
        if inplace == False:
            data = self.data.copy()
            data.drop(data[abs(data[data_name]) > cutoff].index,inplace=True)
            data.reset_index(drop=True,inplace=True)
            new = len(data)
            if log_file == None:
                _print_removed_output(original,new)
            elif type(log_file) == str:
                _log_removed_output(log_file,original,new)
            else :
                raise TypeError('Please provide the location of the log file as \
                                a string type, or leave the argument if no log \
                                file is needed.')

            return self.__class__(data,data.columns)

        elif inplace == True:
            self.drop(self.data[abs(self.data[data_name]) > cutoff].index,
                                inplace=True)
            self.data.reset_index(drop=True,inplace=True)
            new = len(self.data)
            if log_file == None:
                _print_removed_ouiput(original,new)
            elif type(log_file) == str:
                _log_removed_output(log_file,original,new)
            else :
                raise TypeError('Please provide the location of the log file as \
                                a string type, or leave the argument if no log \
                                file is needed.')

    def _select_slope(self,ydata,down=True,limit=0):#,based_on_max=True):#,bounds=[1,1]):

        #TO BE ADJUSTED BASED ON ALL FUNCTIONS FILE!
        """
        Selects down- or upward sloping data from a given dataseries, based on
        the maximum in the dataseries. This requires only one maximum to be
        present in the dataset.

        Parameters
        ----------
        ydata : str
            name of the column containing the data for which slopes, either up
            or down, need to be selected
        down : bool
            if True, the downwards slopes are selected, if False, the upward
            slopes
        based_on_max : bool
            if True, the data is selected based on the maximum of the data, if
            false it is based on the minimum
        bounds : array
            array containing two integer values, indicating the extra margin of
            values that needs to be dropped from the dataset to avoid selecting
            irregular data (e.g. not straightened out after reaching of maximum)

        Returns
        -------
        LabSensorBased object:
            a dataframe from which the non-down or -upward sloping data are dropped
        """
        #if based_on_max == True:
        drop_index = self.data[ydata].idxmax()
        if down == True:
            try:
                print('Selecting downward slope:',drop_index,\
                'datapoints dropped,',len(self.data)-drop_index,\
                'datapoints left.')

                self.data = self.data[drop_index:]
                self.data.reset_index(drop=True,inplace=True)
                return self.__class__(self.data,self.columns)
            except:#IndexError:
                print('Not enough datapoints left for selection')

        elif down == False:
            try:
                print('Selecting upward slope:',len(self.data)-drop_index,\
                'datapoints dropped,',drop_index,'datapoints left.')

                self.data = self.data[:drop_index]
                self.data.reset_index(drop=True,inplace=True)
                return self.__class__(self.data,self.columns)
            except:#IndexError:
                print('Not enough datapoints left for selection')

    #    elif based_on_max == False:
    #        drop_index = dataframe[ydata].idxmin()
    #        if down == True:
    #            try:
    #                print 'Selecting downward slope:',drop_index+sum(bounds),\
    #                'datapoints dropped,',len(dataframe)-drop_index-sum(bounds),\
    #                'datapoints left.'
    #
    #                dataframe = dataframe[bounds[0]:drop_index-bounds[1]]
    #                dataframe.reset_index(drop=True,inplace=True)
    #                return dataframe
    #            except IndexError:
    #                print 'Not enough datapoints left for selection'
    #
    #        elif down == False:
    #            try:
    #                print 'Selecting upward slope:',len(dataframe)-drop_index+sum(bounds),\
    #                'datapoints dropped,',drop_index-sum(bounds),'datapoints left.'
    #
    #                dataframe = dataframe[drop_index+bounds[0]:-bounds[1]]
    #                dataframe.reset_index(drop=True,inplace=True)
    #                return dataframe
    #            except IndexError:
    #                print 'Not enough datapoints left for selection'
    #




##############################
###   NON-CLASS FUNCTIONS  ###
##############################

def _print_removed_output(original,new,type_):
    """
    function printing the output of functions that remove datapoints.

    Parameters
    ----------
    original : int
        original length of the dataset
    new : int
        length of the new dataset
    type_ : str
        'removed' or 'dropped'

    """
    print('Original dataset:',original,'datapoints')
    print('New dataset:',new,'datapoints')
    print(original-new,'datapoints ',type_)

def _log_removed_output(log_file,original,new,type_):
    """
    function writing the output of functions that remove datapoints to a log file.

    Parameters
    ----------
    log_file : str
        string containing the directory to the log file to be written out
    original : int
        original length of the dataset
    new : int
        length of the new dataset
    type_ : str
        'removed' or 'dropped'
    """
    log_file = open(log_file,'a')
    log_file.write(str('\nOriginal dataset: '+str(original)+' datapoints; new dataset: '+
                    str(new)+' datapoints'+str(original-new)+' datapoints ',type_))
    log_file.close()