TableData.st
author Claus Gittinger <cg@exept.de>
Tue, 25 Jun 2019 14:28:51 +0200
changeset 5050 44fa8672d102
parent 4973 084da8f61316
child 5232 a99483a7340f
permissions -rw-r--r--
#DOCUMENTATION by cg class: SharedQueue comment/format in: #next #nextWithTimeout:

"{ Encoding: utf8 }"

"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

Object subclass:#TableData
	instanceVariableNames:'rowData columnNames columnTypes tableName'
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Sequenceable-Table'
!

!TableData class methodsFor:'documentation'!

documentation
"   Unfinished
    Ongoing work to support some algorithms on table data (such as CSV files).
    
    Snippets to read and process tabular (CSV) data
    Snippets to generate a plot.

    The category DWD (Deutscher Wetter Dienst / German weather service)
    contains specific code to deal with historic weather data.
    
    [author:]
        Claus Gittinger

    [instance variables:]
        rowData                 the actual data
        columnNames             name per column 
        columnTypes             type per column (default is String)
        tableName               name - only used for labeling graphs
        
    [class variables:]

    [see also:]

"
! !

!TableData class methodsFor:'instance creation'!

fromFile:filename
    ^ self new readCSV:filename

    "
     self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'
    "

    "Created: / 06-01-2019 / 13:28:53 / Claus Gittinger"
    "Modified: / 09-01-2019 / 12:08:02 / Claus Gittinger"
!

rows:rowData columnNames:names
    ^ self new rows:rowData columnNames:names

    "Created: / 06-01-2019 / 13:25:37 / Claus Gittinger"
!

rows:rowData columnNames:names tableName:tableName
    ^ self new rows:rowData columnNames:names tableName:tableName

    "Created: / 06-01-2019 / 19:24:47 / Claus Gittinger"
! !

!TableData methodsFor:'accessing'!

getColumn:index
    "return a column (by index) as a vector"
    
    ^ rowData collect:[:row | row at:index].

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumn:1.
    "

    "Created: / 06-01-2019 / 19:04:08 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:29:54 / Claus Gittinger"
!

getColumnNamed:name
    "return a column (by name) as a vector"

    ^ self getColumn:(self indexOfColumnNamed:name)

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumnNamed:'TXK'.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:04:24 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:00 / Claus Gittinger"
!

getColumns:indexCollection
    "return multiple columns (by index vector) as a vector of columns"

    ^ rowData collect:[:row | 
        indexCollection collect:[:eachColIdx | row at:eachColIdx] as:Array.
    ].        

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 19:04:47 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:30 / Claus Gittinger"
!

getColumnsNamed:names
    "return multiple columns (by name vector) as a vector of columns"

    |colsIndices|

    colsIndices := names collect:[:eachName | self indexOfColumnNamed:eachName].
    ^ self getColumns:colsIndices

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:04:56 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:41 / Claus Gittinger"
!

tableName:aString
    "set the table's name"
    
    tableName := aString.

    "Modified (comment): / 30-05-2019 / 11:30:52 / Claus Gittinger"
!

tableNamePrefix:aString
    "prepend a prefix to the table's name"

    tableName := aString,tableName.

    "Created: / 06-01-2019 / 21:13:12 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:31:08 / Claus Gittinger"
!

tableNameSuffix:aString
    "append a suffix to the table's name"

    tableName := tableName,aString.

    "Created: / 06-01-2019 / 21:13:01 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:31:17 / Claus Gittinger"
! !

!TableData methodsFor:'analysis'!

addBincoSlidingMean3ForColumnNamed:colName
    "add a column with the sliding binco mean (1/4 + 1/2 + 1/4).
     As binco:3 is quite common,
     that is a tuned version of addBincoSlidingMean:3 forColumnNamed:colName"

    | outRows valColIdx prevRow firstIndex lastIndex meanColIdx|

    valColIdx := self indexOfColumnNamed:colName.
    
    outRows := OrderedCollection new.
    prevRow := nil.
    firstIndex := 1. lastIndex := rowData size.
    rowData := rowData collectWithIndex:[:row :idx|
        |slidingSum|

        ((idx > firstIndex) and:[idx < lastIndex]) ifTrue:[
            slidingSum := (
                               ((rowData at:idx-1) at:valColIdx)
                             + (((rowData at:idx) at:valColIdx)*2)
                             + ((rowData at:idx+1) at:valColIdx)
                          ) / 4.
        ].
        row copyWith:slidingSum.
    ].
    meanColIdx := columnNames size+1.
    (rowData at:firstIndex) at:meanColIdx put:((rowData at:firstIndex+1) at:meanColIdx).
    (rowData at:lastIndex) at:meanColIdx put:((rowData at:lastIndex-1) at:meanColIdx).
    columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addBincoSlidingMean3ForColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 08-01-2019 / 00:10:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:32:34 / Claus Gittinger"
!

addBincoSlidingMean:n forColumnNamed:colName
    "add a column with the sliding binco mean (1/2^n + ... + 1/4 + 1/2 + 1/4 + ... + 1/2^n)"

    |nH outRows valColIdx prevRow firstIndex lastIndex meanColIdx wAll|

    self assert:(n odd).
    nH := n // 2.
    
    valColIdx := self indexOfColumnNamed:colName.
    
    outRows := OrderedCollection new.
    prevRow := nil.
    firstIndex := 1+nH. lastIndex := rowData size-nH.
    wAll := 0.
    1 to:nH do:[:delta |
        wAll := wAll + (1 / (2 raisedTo:delta)).    
        wAll := wAll + (1 / (2 raisedTo:delta)).    
    ].
    wAll := wAll + 1.
    
    rowData := rowData collectWithIndex:[:row :idx|
        |slidingSum|

        ((idx >= firstIndex) and:[idx <= lastIndex]) ifTrue:[
            slidingSum := 0.
            "/ ... /8 /4 /2 /1 /2 /4 /8 ...
            1 to:nH do:[:delta |
                slidingSum := slidingSum + (((rowData at:idx-delta) at:valColIdx) / (2 raisedTo:delta)).    
                slidingSum := slidingSum + (((rowData at:idx+delta) at:valColIdx) / (2 raisedTo:delta)).    
            ].
            slidingSum := slidingSum + ((rowData at:idx) at:valColIdx).
            slidingSum := slidingSum / wAll.
        ].
        row copyWith:slidingSum.
    ].
    meanColIdx := columnNames size+1.
    1 to:nH do:[:delta |
        (rowData at:0+delta) at:meanColIdx put:((rowData at:nH+1) at:meanColIdx)
    ].
    1 to:nH do:[:delta |
        (rowData at:rowData size+1-delta) at:meanColIdx put:((rowData at:rowData size-nH-1) at:meanColIdx)
    ].
    columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addBincoSlidingMean:5 forColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . 
                           #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 07-01-2019 / 15:37:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:22:31 / Claus Gittinger"
!

addSlidingMean:n forColumnNamed:colName
    "add a column with the sliding mean.
     The sliding mean looks meaner, but may introduce lag (phase shift),
     which binco avoids."

    |slidingValues slidingSum outRows valColIdx prevYear sum countDaysPerYear|

    valColIdx := self indexOfColumnNamed:colName.

    "/ take the the first n values as initial sliding mean
    slidingValues := (1 to:n) collect:[:rowIdx | (rowData at:rowIdx) at:valColIdx] as:OrderedCollection.
    slidingSum := slidingValues sum.
    
    outRows := OrderedCollection new.
    prevYear := nil.
    sum := countDaysPerYear := 0.
    rowData := rowData collect:[:row |
        | rowsDate rowsYear val|
        
        slidingSum := slidingSum - slidingValues first.
        slidingValues removeFirst.
        slidingValues add:(row at:valColIdx).
        slidingSum := slidingSum + slidingValues last.
        row copyWith:(slidingSum / n).
    ].
    columnNames := columnNames copyWith:('Sliding_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addSlidingMean:11 forColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 20:05:42 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:33:16 / Claus Gittinger"
!

extractRowsWhere:filterBlock
    "return a new table containing only rows for which filterBlock evaluates to true"
    
    |outRows|

    outRows := rowData select:filterBlock.
    ^ self class 
        rows:outRows columnNames:columnNames
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractRowsWhere:[:row | (row at:1) startsWith:'1945'])
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractRowsWhere:[:row | ((row at:1) from:5 to:6) = '01'])
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:8)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:9)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10)
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 12:35:44 / Claus Gittinger"
! !

!TableData methodsFor:'analysis - DWD'!

extractMonth:monthIndex
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    ^ self extractMonth:monthIndex columnName:'MESS_DATUM' format:'%4y%2m%2d'

    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:00:52 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 18:30:09 / Claus Gittinger"
!

extractMonth:monthIndex columnName:monthColumnName
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    ^ self extractMonth:monthIndex columnName:monthColumnName format:'%4y%2m%2d'
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 14:42:05 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 18:31:31 / Claus Gittinger"
!

extractMonth:monthIndex columnName:monthColumnName format:dateFormat
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    |dateColIdx|

    dateColIdx := self indexOfColumnNamed:monthColumnName.
    ^ (self 
        extractRowsWhere:[:row | 
            | rowsDate rowsYearAndMonth val|

            rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
            rowsDate month = monthIndex
        ])
        tableName:(Date nameOfMonth:monthIndex),' Mean'
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:3 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

     |table|
     table := (self new 
            readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt').
     1 to:12 do:[:month |        
         ((table
                meanPerMonthOfColumnNamed:'TXK')
                    extractMonth:month columnName:'MESS_DATUM' format:'%4y%2m')
                        tableNameSuffix:'-Echterdingen';
                        plot
     ]
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 18:27:28 / Claus Gittinger"
!

extractMonth:month day:day
    "return a new table containing only rows for that day in month"

    ^ self extractMonth:month day:day columnName:'MESS_DATUM' format:'%4y%2m%2d'

    "
     ((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        extractMonth:1 day:1)
            tableName:'1st Januar, Feldberg';
            plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:15:46 / Claus Gittinger"
    "Modified (comment): / 10-01-2019 / 16:20:48 / Claus Gittinger"
!

extractMonth:month day:day columnName:monthColumnName format:dateFormat
    "return a new table containing only rows for that day in month"

    |outRows dateColIdx|

    dateColIdx := self indexOfColumnNamed:monthColumnName.
    outRows := rowData select:[:row |
        |rowsDate|
        
        rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
        (rowsDate month = month) and:[rowsDate day = day].
    ].
    ^ self class rows:outRows columnNames:columnNames

    "
     ((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        extractMonth:1 day:1 columnName:'MESS_DATUM' format:'%4y%2m%2d')
            plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 10-01-2019 / 16:14:29 / Claus Gittinger"
!

meanPerMonthOfColumnNamed:colName
    "return a new table containing the arithmetic mean per month of a column"

    |outRows valColIdx dateColIdx prevYearAndMonth sum countDaysPerMonth|

    dateColIdx := self indexOfColumnNamed:'MESS_DATUM'.
    valColIdx := self indexOfColumnNamed:colName.

    outRows := OrderedCollection new.
    prevYearAndMonth := nil.
    sum := countDaysPerMonth := 0.
    rowData do:[:row |
        | rowsDate rowsYearAndMonth val|
        
        rowsDate := row at:dateColIdx.
        "/ yyyymmdd
        rowsYearAndMonth := rowsDate copyTo:6.
        prevYearAndMonth isNil ifTrue:[
            prevYearAndMonth := rowsYearAndMonth.
            sum := countDaysPerMonth := 0.
        ].
        (rowsYearAndMonth = prevYearAndMonth) ifTrue:[
            val := Number readFrom:(row at:valColIdx).
            sum := sum + val.
            countDaysPerMonth := countDaysPerMonth + 1.
        ] ifFalse:[
            outRows add:{ prevYearAndMonth . (sum / countDaysPerMonth) }.
            sum := countDaysPerMonth := 0.
            prevYearAndMonth := rowsYearAndMonth.
        ].
    ].
    ^ self class 
        rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean per month of ',colName }
        tableName:('Mean Per Month of ',colName)
    
    "
     ((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                plot
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:06:36 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:24:34 / Claus Gittinger"
!

meanPerYearOfColumnNamed:colName
    "return a new table containing the arithmetic mean per year of a column"

    |outRows valColIdx dateColIdx prevYear sum countDaysPerYear|

    dateColIdx := self indexOfColumnNamed:'MESS_DATUM'.
    valColIdx := self indexOfColumnNamed:colName.

    outRows := OrderedCollection new.
    prevYear := nil.
    sum := countDaysPerYear := 0.
    rowData do:[:row |
        | rowsDate rowsYear val|
        
        rowsDate := row at:dateColIdx.
        "/ yyyymmdd
        rowsYear := rowsDate copyTo:4.
        prevYear isNil ifTrue:[
            prevYear := rowsYear.
            sum := countDaysPerYear := 0.
        ].
        (rowsYear = prevYear) ifTrue:[
            val := Number readFrom:(row at:valColIdx).
            sum := sum + val.
            countDaysPerYear := countDaysPerYear + 1.
        ] ifFalse:[
            outRows add:{ prevYear . (sum / countDaysPerYear) }.
            sum := countDaysPerYear := 0.
            prevYear := rowsYear.
        ].
    ].
    ^ self class 
        rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean_Per_Year_',colName }
        tableName:('Mean Per Year of ',colName)
    
    "
     ((self new 
        read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                plot
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:40:34 / Claus Gittinger"
    "Modified: / 06-01-2019 / 22:34:06 / Claus Gittinger"
! !

!TableData methodsFor:'helpers'!

indexOfColumnNamed:name
    "find a column index by name"
    
    ^ columnNames indexOf:name ifAbsent:[self error:'no such column'].

    "
     self new 
        read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        indexOfColumnNamed:'MESS_DATUM'.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:20:47 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:33:23 / Claus Gittinger"
! !

!TableData methodsFor:'instance creation'!

rows:rowDataArg columnNames:columnNamesArg
    rowData := rowDataArg.
    columnNames := columnNamesArg.

    "Created: / 06-01-2019 / 13:26:01 / Claus Gittinger"
!

rows:rowDataArg columnNames:columnNamesArg tableName:tableNameArg
    rowData := rowDataArg.
    columnNames := columnNamesArg.
    tableName := tableNameArg.

    "Created: / 06-01-2019 / 19:25:10 / Claus Gittinger"
! !

!TableData methodsFor:'plotting'!

plot
    self plot:nil

    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
    "

    "Created: / 06-01-2019 / 18:47:27 / Claus Gittinger"
    "Modified: / 06-01-2019 / 21:32:22 / Claus Gittinger"
!

plot:optionalSpec
    |tmpFile|

    [
        tmpFile := Filename newTemporary.
        tmpFile writingFileDo:[:s |
            self writeCSVToStream:s.
        ].
        "/ FileBrowserV2 openOn:tmpFile.
        self plot:optionalSpec file:tmpFile.
    ] ensure:[
        tmpFile delete
    ].

    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
    "

    "Created: / 06-01-2019 / 21:32:09 / Claus Gittinger"
    "Modified: / 09-01-2019 / 18:40:16 / Claus Gittinger"
!

plot:optionalSpec file:fileName
    |script scriptFile outFile width height spec xCol yCols|

    scriptFile := Filename newTemporary .
    outFile := '/tmp/plot3.png'.
    outFile asFilename contents:''.
    
    width := 1200.
    height := 400.

    optionalSpec notNil ifTrue:[
        spec := optionalSpec isDictionary 
                    ifTrue:[optionalSpec] 
                    ifFalse:[Dictionary withAssociations:optionalSpec].
        
        yCols := spec at:#y.
        xCol := spec at:#x.
        script := '
png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")
',xCol,' <- Data$',xCol,'
',((yCols collect:[:col | col,' <- Data$',col]) asStringWith:Character cr),
"/'plot(',xCol,', ' 
"/       ,((yCols collect:[:col | col,', ']) asStringWith:''),
"/' type="l", main = "%5")'
'
',('plot(%1, %2, type="b", col=2, main = "%%5")' bindWith:xCol with:yCols first),'
',((yCols from:2 collect:[:ycol | 'lines(%1, %2, type="l", col=3)' bindWith:xCol with:ycol]) asStringWith:Character cr)
,'
# dev.off()
'
            bindWith:outFile 
            with:width 
            with:height
            with:fileName asFilename pathName
            with:(tableName ? '').
        "/ self halt.
    ] ifFalse:[        
        script := '
# library(ggplot2)
# library(ggfortify)

png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")

# theme_set(theme_classic())

# Plot 
# autoplot(Data) + 
#   labs(title="Data") + 
#   theme(plot.title = element_text(hjust=0.1))
plot(Data, type="l", main = "%5")
# plot(Data, cex = .4)
dev.off()
'
            bindWith:outFile 
            with:width 
            with:height
            with:fileName asFilename pathName
            with:(tableName ? '').
    ].
    
    scriptFile contents:script.        

    OSProcess new 
        command:('r --silent --vanilla --slave -f %1' bindWith:scriptFile);
        execute.
    "/ self halt.
    "/ outFile asFilename exists.
    ((Smalltalk classNamed:#Image) fromFile:outFile asFilename) inspect.
    
    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
     tmax tableName:'Feldberg'.
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}.
    "

    "Created: / 06-01-2019 / 21:32:43 / Claus Gittinger"
    "Modified: / 30-05-2019 / 11:28:42 / Claus Gittinger"
!

plotFile:fileName
    self plot:nil file:fileName
    
    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
     tmax tableName:'Test'.
     tmax plot.
    "

    "Created: / 06-01-2019 / 15:59:47 / Claus Gittinger"
    "Modified: / 06-01-2019 / 21:32:59 / Claus Gittinger"
!

writeCSVToStream:aStream
    "save myself as CSV onto a stream"
    
    columnNames 
        do:[:nm | 
            (nm includes:$;) ifTrue:[
                aStream nextPut:$"; nextPutAll:nm; nextPut:$".
            ] ifFalse:[
                aStream nextPutAll:nm
            ]]
        separatedBy:[aStream nextPut:$;].
    aStream cr.        
    rowData do:[:row |
        row
            do:[:col | 
                |s|
                s := col printString.
                (s includes:$;) ifTrue:[
                    aStream nextPut:$"; nextPutAll:s; nextPut:$".
                ] ifFalse:[
                    aStream nextPutAll:s
                ]
            ]
            separatedBy:[aStream nextPut:$;].
        aStream cr.        
    ].

    "
     |data tmax|
     
     data := self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     String streamContents:[:s | tmax writeCSVToStream:s]
    "

    "Created: / 09-01-2019 / 17:48:25 / Claus Gittinger"
! !

!TableData methodsFor:'processing'!

removeColumn:index
    "destructively remove a column"
    
    columnNames := columnNames copyWithoutIndex:index.
    rowData := rowData collect:[:row | row copyWithoutIndex:index].

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        removeColumn:1;
        inspect.
    "

    "Created: / 06-01-2019 / 13:05:41 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:50 / Claus Gittinger"
!

removeColumnNamed:name
    "destructively remove a column"

    self removeColumn:(self indexOfColumnNamed:name)

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        removeColumnNamed:'STATIONS_ID';
        removeColumnNamed:'QN_3';
        removeColumnNamed:'FX';
        removeColumnNamed:'FM';
        removeColumnNamed:'RSK';
        removeColumnNamed:'RSKF';
        removeColumnNamed:'SDK';
        removeColumnNamed:'SHK_TAG';
        removeColumnNamed:'NM';
        removeColumnNamed:'VPM';
        removeColumnNamed:'PM';
        removeColumnNamed:'TMK';
        removeColumnNamed:'UPM';
        removeColumnNamed:'eor';
        inspect.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:06:50 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:53 / Claus Gittinger"
!

withColumns:indexCollection
    "return a new TableData instance, containing only the given columns"
    
    |remainingNames|

    remainingNames := indexCollection collect:[:colIdx | columnNames at:colIdx] as:Array.
    ^ self class
        rows:(rowData collect:[:row | 
                indexCollection collect:[:colIdx | row at:colIdx] as:Array
                ])
        columnNames:remainingNames.

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 14:35:11 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:57 / Claus Gittinger"
!

withColumnsNamed:nameCollection
    "return a new TableData instance, containing only the given columns"

    ^ self withColumns:(nameCollection collect:[:nm |self indexOfColumnNamed:nm])

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withColumnsNamed:#('MESS_DATUM' 'TXK').
    "

    "Created: / 06-01-2019 / 14:36:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:45 / Claus Gittinger"
!

withoutColumn:index
    "return a new TableData instance, without the given columns"

    ^ self class
        rows:(rowData collect:[:row | row copyWithoutIndex:index])
        columnNames:(columnNames copyWithoutIndex:index).

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        withoutColumn:1.
    "

    "Created: / 06-01-2019 / 13:27:11 / Claus Gittinger"
    "Modified (comment): / 10-01-2019 / 16:25:56 / Claus Gittinger"
!

withoutColumns:indexCollection
    "return a new TableData instance, without the given columns"

    |remainingColIndices remainingNames|

    remainingColIndices := (1 to:columnNames size) asNewOrderedCollection removeAll:indexCollection; yourself.
    remainingNames := remainingColIndices collect:[:colIdx | columnNames at:colIdx] as:Array.
    ^ self class
        rows:(rowData collect:[:row | 
                remainingColIndices collect:[:colIdx | row at:colIdx] as:Array
                ])
        columnNames:remainingNames.

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withoutColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 14:32:08 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:14:29 / Claus Gittinger"
! !

!TableData methodsFor:'reading'!

readCSV:filename
    ^ self readCSV:filename separator:$;

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "Created: / 09-01-2019 / 12:07:27 / Claus Gittinger"
!

readCSV:filename separator:separatorCharacter
    |headLine numCols dataLine ls word row rows|

    filename asFilename readingFileDo:[:s |
        rows := OrderedCollection new.
        headLine := s nextLine.
        numCols := (headLine occurrencesOf:separatorCharacter)+1.
        columnNames := (headLine splitBy:separatorCharacter) collect:#withoutSeparators.
        [s atEnd] whileFalse:[
            dataLine := s nextLine.
            ls := dataLine readStream.
            row := OrderedCollection new:numCols.
            [ls atEnd] whileFalse:[
                ls skipSeparators.
                word := (ls upTo:separatorCharacter) withoutSeparators.
                row add:word.
            ].
            rows add:(row asArray).
        ].
    ].
    rowData := rows.

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "Created: / 09-01-2019 / 12:11:11 / Claus Gittinger"
!

readDemoCSV
    self readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.

    "Created: / 09-01-2019 / 12:07:52 / Claus Gittinger"
! !

!TableData class methodsFor:'documentation'!

version_CVS
    ^ '$Header$'
! !