TableData.st
author Claus Gittinger <cg@exept.de>
Sat, 02 May 2020 21:40:13 +0200
changeset 5476 7355a4b11cb6
parent 5457 4417adcb02b1
permissions -rw-r--r--
#FEATURE by cg class: Socket class added: #newTCPclientToHost:port:domain:domainOrder:withTimeout: changed: #newTCPclientToHost:port:domain:withTimeout:

"
 COPYRIGHT (c) 2018 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
"{ Package: 'stx:libbasic2' }"

"{ NameSpace: Smalltalk }"

Object subclass:#TableData
	instanceVariableNames:'rowData columnNames columnTypes tableName'
	classVariableNames:''
	poolDictionaries:''
	category:'Collections-Sequenceable-Table'
!

!TableData class methodsFor:'documentation'!

copyright
"
 COPYRIGHT (c) 2018 by eXept Software AG
              All Rights Reserved

 This software is furnished under a license and may be used
 only in accordance with the terms of that license and with the
 inclusion of the above copyright notice.   This software may not
 be provided or otherwise made available to, or used by, any
 other person.  No title to or ownership of the software is
 hereby transferred.
"
!

documentation
"   Unfinished
    Ongoing work to support some algorithms on table data (such as CSV files).
    
    Snippets to read and process tabular (CSV) data
    Snippets to generate a plot.

    The category DWD (Deutscher Wetter Dienst / German weather service)
    contains specific code to deal with historic weather data.

    plot requires the r language to be installed (uses 'r' command).

    [author:]
        Claus Gittinger

    [instance variables:]
        rowData                 the actual data
        columnNames             name per column 
        columnTypes             type per column (default is String)
        tableName               name - only used for labeling graphs
        
    [class variables:]

    [see also:]

"
!

examples
"  
     self new 
        readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv';
        inspect.

     ((self new 
        readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv' separator:$, skip:1)
            masie_meanPerMonthOfColumnNamed:'(0) Northern_Hemisphere')
                plot

     '/tmp/xxx.csv' asFilename writingFileDo:[:s |
        ((self new 
            readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv' separator:$, skip:1)
                masie_meanPerMonthOfColumnNamed:'(0) Northern_Hemisphere')
                    writeCSVToStream:s
      ]  

     |data tmax|
     data := self fromFile:'/Users/exept/Desktop/klima/monatswerte_KL_04927_17920101_19840731_hist/produkt_klima_monat_17920101_19840731_04927.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM_BEGINN' 'MO_TT').
     tmax tableName:'Stuttgart 04927'.
     tmax plot:{ #x -> 'MESS_DATUM_BEGINN' . #y -> { 'MO_TT'  }}.

     |data|
     data := self fromFile:'/Users/exept/Desktop/klima/monatswerte_KL_04927_17920101_19840731_hist/produkt_klima_monat_17920101_19840731_04927.txt'.
     data := data withColumnsNamed:#('MESS_DATUM_BEGINN' 'MO_TT').
     data := data dwd_meanPerYearOfColumnNamed:'MO_TT'.
     data tableName:'Stuttgart 04927'.
     data plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_MO_TT'  }}.

     |data|
     data := self fromFile:'/Users/exept/Desktop/klima/monatswerte_KL_05792_19000801_20181231_hist/produkt_klima_monat_19000801_20181231_05792.csv'.
     data := data withColumnsNamed:#('MESS_DATUM_BEGINN' 'MO_TT').
     data := data dwd_meanPerYearOfColumnNamed:'MO_TT'.
     data tableName:'Zugspitze 04927'.
     data plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_MO_TT'  }}.
"
! !

!TableData class methodsFor:'instance creation'!

fromFile:filename
    ^ self new readCSV:filename

    "
     self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'
    "

    "Created: / 06-01-2019 / 13:28:53 / Claus Gittinger"
    "Modified: / 09-01-2019 / 12:08:02 / Claus Gittinger"
!

rows:rowData columnNames:names
    ^ self new rows:rowData columnNames:names

    "Created: / 06-01-2019 / 13:25:37 / Claus Gittinger"
!

rows:rowData columnNames:names tableName:tableName
    ^ self new rows:rowData columnNames:names tableName:tableName

    "Created: / 06-01-2019 / 19:24:47 / Claus Gittinger"
! !

!TableData methodsFor:'accessing'!

getColumn:index
    "return a column (by index) as a vector"
    
    ^ rowData collect:[:row | row at:index].

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumn:1.
    "

    "Created: / 06-01-2019 / 19:04:08 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:29:54 / Claus Gittinger"
!

getColumnNamed:name
    "return a column (by name) as a vector"

    ^ self getColumn:(self indexOfColumnNamed:name)

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumnNamed:'TXK'.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:04:24 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:00 / Claus Gittinger"
!

getColumns:indexCollection
    "return multiple columns (by index vector) as a vector of columns"

    ^ rowData collect:[:row | 
        indexCollection collect:[:eachColIdx | row at:eachColIdx] as:Array.
    ].        

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 19:04:47 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:30 / Claus Gittinger"
!

getColumnsNamed:names
    "return multiple columns (by name vector) as a vector of columns"

    |colsIndices|

    colsIndices := names collect:[:eachName | self indexOfColumnNamed:eachName].
    ^ self getColumns:colsIndices

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        getColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:04:56 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:30:41 / Claus Gittinger"
!

tableName:aString
    "set the table's name"
    
    tableName := aString.

    "Modified (comment): / 30-05-2019 / 11:30:52 / Claus Gittinger"
!

tableNamePrefix:aString
    "prepend a prefix to the table's name"

    tableName := aString,tableName.

    "Created: / 06-01-2019 / 21:13:12 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:31:08 / Claus Gittinger"
!

tableNameSuffix:aString
    "append a suffix to the table's name"

    tableName := tableName,aString.

    "Created: / 06-01-2019 / 21:13:01 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:31:17 / Claus Gittinger"
! !

!TableData methodsFor:'analysis'!

addBincoSlidingMean3ForColumnNamed:colName
    "add a column with the sliding binco mean (1/4 + 1/2 + 1/4).
     As binco:3 is quite common,
     that is a tuned version of addBincoSlidingMean:3 forColumnNamed:colName"

    | outRows valColIdx prevRow firstIndex lastIndex meanColIdx|

    valColIdx := self indexOfColumnNamed:colName.
    
    outRows := OrderedCollection new.
    prevRow := nil.
    firstIndex := 1. lastIndex := rowData size.
    rowData := rowData collectWithIndex:[:row :idx|
        |slidingSum|

        ((idx > firstIndex) and:[idx < lastIndex]) ifTrue:[
            slidingSum := (
                               ((rowData at:idx-1) at:valColIdx)
                             + (((rowData at:idx) at:valColIdx)*2)
                             + ((rowData at:idx+1) at:valColIdx)
                          ) / 4.
        ].
        row copyWith:slidingSum.
    ].
    meanColIdx := columnNames size+1.
    (rowData at:firstIndex) at:meanColIdx put:((rowData at:firstIndex+1) at:meanColIdx).
    (rowData at:lastIndex) at:meanColIdx put:((rowData at:lastIndex-1) at:meanColIdx).
    columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addBincoSlidingMean3ForColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 08-01-2019 / 00:10:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:32:34 / Claus Gittinger"
!

addBincoSlidingMean:n forColumnNamed:colName
    "add a column with the sliding binco mean (1/2^n + ... + 1/4 + 1/2 + 1/4 + ... + 1/2^n)"

    |nH outRows valColIdx prevRow firstIndex lastIndex meanColIdx wAll|

    self assert:(n odd).
    nH := n // 2.
    
    valColIdx := self indexOfColumnNamed:colName.
    
    outRows := OrderedCollection new.
    prevRow := nil.
    firstIndex := 1+nH. lastIndex := rowData size-nH.
    wAll := 0.
    1 to:nH do:[:delta |
        wAll := wAll + (1 / (2 raisedTo:delta)).    
        wAll := wAll + (1 / (2 raisedTo:delta)).    
    ].
    wAll := wAll + 1.
    
    rowData := rowData collectWithIndex:[:row :idx|
        |slidingSum|

        ((idx >= firstIndex) and:[idx <= lastIndex]) ifTrue:[
            slidingSum := 0.
            "/ ... /8 /4 /2 /1 /2 /4 /8 ...
            1 to:nH do:[:delta |
                slidingSum := slidingSum + (((rowData at:idx-delta) at:valColIdx) / (2 raisedTo:delta)).    
                slidingSum := slidingSum + (((rowData at:idx+delta) at:valColIdx) / (2 raisedTo:delta)).    
            ].
            slidingSum := slidingSum + ((rowData at:idx) at:valColIdx).
            slidingSum := slidingSum / wAll.
        ].
        row copyWith:slidingSum.
    ].
    meanColIdx := columnNames size+1.
    1 to:nH do:[:delta |
        (rowData at:0+delta) at:meanColIdx put:((rowData at:nH+1) at:meanColIdx)
    ].
    1 to:nH do:[:delta |
        (rowData at:rowData size+1-delta) at:meanColIdx put:((rowData at:rowData size-nH-1) at:meanColIdx)
    ].
    columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addBincoSlidingMean:5 forColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . 
                           #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 07-01-2019 / 15:37:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:22:31 / Claus Gittinger"
!

addSlidingMean:n forColumnNamed:colName
    "add a column with the sliding mean.
     The sliding mean looks meaner, but may introduce lag (phase shift),
     which binco avoids."

    |slidingValues slidingSum outRows valColIdx prevYear sum countDaysPerYear|

    valColIdx := self indexOfColumnNamed:colName.

    "/ take the the first n values as initial sliding mean
    slidingValues := (1 to:n) collect:[:rowIdx | (rowData at:rowIdx) at:valColIdx] as:OrderedCollection.
    slidingSum := slidingValues sum.
    
    outRows := OrderedCollection new.
    prevYear := nil.
    sum := countDaysPerYear := 0.
    rowData := rowData collect:[:row |
        | rowsDate rowsYear val|
        
        slidingSum := slidingSum - slidingValues first.
        slidingValues removeFirst.
        slidingValues add:(row at:valColIdx).
        slidingSum := slidingSum + slidingValues last.
        row copyWith:(slidingSum / n).
    ].
    columnNames := columnNames copyWith:('Sliding_Mean_of_',colName).
    ^ self 
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerYearOfColumnNamed:'TXK')
                addSlidingMean:11 forColumnNamed:'Mean_Per_Year_TXK')
                    tableName:'Feldberg/Schwarzwald';
                    plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Mean_of_Mean_Per_Year_TXK' .  }}.
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 20:05:42 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:33:16 / Claus Gittinger"
!

extractRowsWhere:filterBlock
    "return a new table containing only rows for which filterBlock evaluates to true"
    
    |outRows|

    outRows := rowData select:filterBlock.
    ^ self class 
        rows:outRows columnNames:columnNames
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractRowsWhere:[:row | (row at:1) startsWith:'1945'])
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractRowsWhere:[:row | ((row at:1) from:5 to:6) = '01'])
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:8)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:9)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10)
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            meanPerMonthOfColumnNamed:'TXK')
                extractMonth:10)
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 12:35:44 / Claus Gittinger"
! !

!TableData methodsFor:'analysis - DWD'!

dwd_extractMonth:monthIndex
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    ^ self dwd_extractMonth:monthIndex columnName:'MESS_DATUM' format:'%4y%2m%2d'

    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:00:52 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 18:30:09 / Claus Gittinger"
!

dwd_extractMonth:monthIndex columnName:monthColumnName
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    ^ self dwd_extractMonth:monthIndex columnName:monthColumnName format:'%4y%2m%2d'
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 14:42:05 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 18:31:31 / Claus Gittinger"
!

dwd_extractMonth:monthIndex columnName:monthColumnName format:dateFormat
    "return a new table containing only rows for that month.
     This is specific to DWD data"
    
    |dateColIdx|

    dateColIdx := self indexOfColumnNamed:monthColumnName.
    ^ (self 
        extractRowsWhere:[:row | 
            | rowsDate rowsYearAndMonth val|

            rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
            rowsDate month = monthIndex
        ])
        tableName:(Date nameOfMonth:monthIndex),' Mean'
    
    "
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot
        
     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

     (((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                dwd_extractMonth:3 columnName:'MESS_DATUM' format:'%4y%2m')
                    tableNameSuffix:'-Echterdingen';
                    plot

     |table|
     table := (self new 
            readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt').
     1 to:12 do:[:month |        
         ((table
                dwd_meanPerMonthOfColumnNamed:'TXK')
                    dwd_extractMonth:month columnName:'MESS_DATUM' format:'%4y%2m')
                        tableNameSuffix:'-Echterdingen';
                        plot
     ]
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 09-01-2019 / 18:27:28 / Claus Gittinger"
!

dwd_extractMonth:month day:day
    "return a new table containing only rows for that day in month"

    ^ self dwd_extractMonth:month day:day columnName:'MESS_DATUM' format:'%4y%2m%2d'

    "
     ((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        dwd_extractMonth:1 day:1)
            tableName:'1st Januar, Feldberg';
            plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:15:46 / Claus Gittinger"
    "Modified (comment): / 10-01-2019 / 16:20:48 / Claus Gittinger"
!

dwd_extractMonth:month day:day columnName:monthColumnName format:dateFormat
    "return a new table containing only rows for that day in month"

    |outRows dateColIdx|

    dateColIdx := self indexOfColumnNamed:monthColumnName.
    outRows := rowData select:[:row |
        |rowsDate|
        
        rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
        (rowsDate month = month) and:[rowsDate day = day].
    ].
    ^ self class rows:outRows columnNames:columnNames

    "
     ((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        dwd_extractMonth:1 day:1 columnName:'MESS_DATUM' format:'%4y%2m%2d')
            plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 10-01-2019 / 16:14:29 / Claus Gittinger"
!

dwd_meanPerMonthOfColumnNamed:colName
    "return a new table containing the arithmetic mean per month of a column"

    |outRows valColIdx dateColIdx prevYearAndMonth sum countDaysPerMonth|

    dateColIdx := self indexOfColumnNamed:'MESS_DATUM' ifAbsent:[self indexOfColumnNamed:'MESS_DATUM_BEGINN'].
    valColIdx := self indexOfColumnNamed:colName.

    outRows := OrderedCollection new.
    prevYearAndMonth := nil.
    sum := countDaysPerMonth := 0.
    rowData do:[:row |
        | rowsDate rowsYearAndMonth val|
        
        rowsDate := row at:dateColIdx.
        "/ yyyymmdd
        rowsYearAndMonth := rowsDate copyTo:6.
        prevYearAndMonth isNil ifTrue:[
            prevYearAndMonth := rowsYearAndMonth.
            sum := countDaysPerMonth := 0.
        ].
        (rowsYearAndMonth = prevYearAndMonth) ifTrue:[
            val := Number readFrom:(row at:valColIdx).
            sum := sum + val.
            countDaysPerMonth := countDaysPerMonth + 1.
        ] ifFalse:[
            outRows add:{ prevYearAndMonth . (sum / countDaysPerMonth) }.
            sum := countDaysPerMonth := 0.
            prevYearAndMonth := rowsYearAndMonth.
        ].
    ].
    ^ self class 
        rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean per month of ',colName }
        tableName:('Mean Per Month of ',colName)
    
    "
     ((self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerMonthOfColumnNamed:'TXK')
                plot
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:06:36 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:24:34 / Claus Gittinger"
!

dwd_meanPerYearOfColumnNamed:colName
    "return a new table containing the arithmetic mean per year of a column"

    |outRows valColIdx dateColIdx prevYear sum countValuesPerYear 
     prevVal mustFillPrevious mean|

    dateColIdx := self indexOfColumnNamed:'MESS_DATUM' ifAbsent:[self indexOfColumnNamed:'MESS_DATUM_BEGINN'].
    valColIdx := self indexOfColumnNamed:colName.

    outRows := OrderedCollection new.
    prevYear := nil.
    sum := countValuesPerYear := 0.
    prevVal := nil.
    mustFillPrevious := false.
    rowData doWithIndex:[:row :rowIndex|
        | rowsDate rowsYear rowsMonth val|
        
        rowsDate := row at:dateColIdx.
        "/ yyyymmdd
        rowsYear := rowsDate copyTo:4.
        prevYear isNil ifTrue:[
            prevYear := rowsYear.
            sum := countValuesPerYear := 0.
        ].
        (rowsYear = prevYear) ifFalse:[
            "/ Jahreswechsel
            countValuesPerYear = 12 ifFalse:[
                "/ ignore if incomplete - take mean of prevyear and next year
                mustFillPrevious ifTrue:[self halt:'should not happen'].
                mustFillPrevious := true.
                outRows add:{ prevYear . mean }.
            ] ifTrue:[
                mean := sum / countValuesPerYear.
                mustFillPrevious ifTrue:[
                    |t fillIn|
                    t := (outRows last at:2).
                    t isNil ifTrue:[
                        fillIn := mean
                    ] ifFalse:[
                        fillIn := (t+mean)/2
                    ].
                    outRows last at:2 put:fillIn
                ].
                outRows add:{ prevYear . mean }.
                mustFillPrevious := false.
            ].
            sum := countValuesPerYear := 0.
            prevYear := rowsYear.
        ].
        val := Number readFrom:(row at:valColIdx).
        (val = -999) ifTrue:[
            |prevRow nextRow mean|

            "/ fill with arith. mean of previous value and next value
            prevRow := rowData at:rowIndex-1 ifAbsent:[row].
            nextRow := rowData at:rowIndex+1 ifAbsent:[row].
            mean := ((Number readFrom:(prevRow at:valColIdx)) + (Number readFrom:(nextRow at:valColIdx))) / 2.

            val := mean.
        ].
        sum := sum + val.
        countValuesPerYear := countValuesPerYear + 1.

        prevVal notNil ifTrue:[
            "/ ausreisser?
            (prevVal - val) abs > 20 ifTrue:[self halt].
        ].
        prevVal := val.
    ].
    ^ self class 
        rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean_Per_Year_',colName }
        tableName:('Mean Per Year of ',colName)
    
    "
     ((self new 
        read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
            dwd_meanPerYearOfColumnNamed:'TXK')
                plot
        
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 19:40:34 / Claus Gittinger"
    "Modified: / 06-01-2019 / 22:34:06 / Claus Gittinger"
! !

!TableData methodsFor:'analysis - masie'!

masie_meanPerMonthOfColumnNamed:colName
    "return a new table containing the arithmetic mean per month of a column
     of masie data"

    |outRows valColIdx dateColIdx prevMonth prevYearAndMonth sum countDaysPerMonth|

    dateColIdx := self indexOfColumnNamed:'yyyyddd'.
    valColIdx := self indexOfColumnNamed:colName.

    outRows := OrderedCollection new.
    prevMonth := nil.
    sum := countDaysPerMonth := 0.
    rowData do:[:row |
        | date rowsDate rowsYear month rowsDay yearAndMonth rowsYearAndDay val |
        
        rowsDate := row at:dateColIdx.
        "/ yyyyddd
        rowsYear := Integer readFrom:(rowsDate copyTo:4).
        rowsDay := Integer readFrom:(rowsDate copyFrom:5).
        date := Date newDay:rowsDay year:rowsYear.
        month := date month.
        yearAndMonth := '%4d%02d' printfWith:date year with:date month.
        prevMonth isNil ifTrue:[
            prevMonth := month.
            sum := countDaysPerMonth := 0.
        ].
        (month = prevMonth) ifTrue:[
            val := Number readFrom:(row at:valColIdx).
            sum := sum + val.
            countDaysPerMonth := countDaysPerMonth + 1.
        ] ifFalse:[
            outRows add:{ prevYearAndMonth . (sum / countDaysPerMonth) }.
            sum := countDaysPerMonth := 0.
            prevYearAndMonth := yearAndMonth.
            prevMonth := month.
        ].
    ].
    ^ self class 
        rows:outRows columnNames:{ 'YearAndMonth' . 'Mean per month of ',colName }
        tableName:('Mean Per Month of ',colName)
    
    "
     ((self new 
        readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv' separator:$, skip:1)
            masie_meanPerMonthOfColumnNamed:'(0) Northern_Hemisphere')
                plot
        
    "

    "Created: / 06-01-2019 / 19:06:36 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:24:34 / Claus Gittinger"
! !

!TableData methodsFor:'helpers'!

indexOfColumnNamed:name
    "find a column index by name"
    
    ^ columnNames indexOf:name ifAbsent:[self error:'no such column'].

    "
     self new 
        read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        indexOfColumnNamed:'MESS_DATUM'.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:20:47 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:33:23 / Claus Gittinger"
!

indexOfColumnNamed:name ifAbsent:exceptionValue
    "find a column index by name"
    
    ^ columnNames indexOf:name ifAbsent:exceptionValue.

    "
     self new 
        read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        indexOfColumnNamed:'MESS_DATUM'.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:20:47 / Claus Gittinger"
    "Modified (comment): / 30-05-2019 / 11:33:23 / Claus Gittinger"
! !

!TableData methodsFor:'instance creation'!

rows:rowDataArg columnNames:columnNamesArg
    rowData := rowDataArg.
    columnNames := columnNamesArg.

    "Created: / 06-01-2019 / 13:26:01 / Claus Gittinger"
!

rows:rowDataArg columnNames:columnNamesArg tableName:tableNameArg
    rowData := rowDataArg.
    columnNames := columnNamesArg.
    tableName := tableNameArg.

    "Created: / 06-01-2019 / 19:25:10 / Claus Gittinger"
! !

!TableData methodsFor:'plotting'!

plot
    self plot:nil

    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
    "

    "Created: / 06-01-2019 / 18:47:27 / Claus Gittinger"
    "Modified: / 06-01-2019 / 21:32:22 / Claus Gittinger"
!

plot:optionalSpec
    |tmpFile|

    [
        tmpFile := Filename newTemporary.
        tmpFile writingFileDo:[:s |
            self writeCSVToStream:s.
        ].
        "/ FileBrowserV2 openOn:tmpFile.
        self plot:optionalSpec file:tmpFile.
    ] ensure:[
        tmpFile delete
    ].

    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
    "

    "Created: / 06-01-2019 / 21:32:09 / Claus Gittinger"
    "Modified: / 09-01-2019 / 18:40:16 / Claus Gittinger"
!

plot:optionalSpec file:fileName
    |script scriptFile outFile width height spec xCol yCols cmd err|

    scriptFile := Filename newTemporary .
    outFile := '/tmp/plot3.png'.
    outFile asFilename contents:''.
    
    width := 1200.
    height := 800.

    optionalSpec notNil ifTrue:[
        spec := optionalSpec isDictionary 
                    ifTrue:[optionalSpec] 
                    ifFalse:[Dictionary withAssociations:optionalSpec].
        
        yCols := spec at:#y.
        xCol := spec at:#x.
        script := '
png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")
',xCol,' <- Data$',xCol,'
',((yCols collect:[:col | col,' <- Data$',col]) asStringWith:Character cr),
"/'plot(',xCol,', ' 
"/       ,((yCols collect:[:col | col,', ']) asStringWith:''),
"/' type="l", main = "%5")'
'
',('plot(%1, %2, type="b", col=2, main = "%%5")' bindWith:xCol with:yCols first),'
',((yCols from:2 collect:[:ycol | 'lines(%1, %2, type="l", col=3)' bindWith:xCol with:ycol]) asStringWith:Character cr)
,'
# dev.off()
'
            bindWith:outFile 
            with:width 
            with:height
            with:fileName asFilename pathName
            with:(tableName ? '').
        "/ self halt.
    ] ifFalse:[        
        script := '
# library(ggplot2)
# library(ggfortify)

png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")

# theme_set(theme_classic())

# Plot 
# autoplot(Data) + 
#   labs(title="Data") + 
#   theme(plot.title = element_text(hjust=0.1))
plot(Data, type="l", main = "%5")
# plot(Data, cex = .4)
dev.off()
'
            bindWith:outFile 
            with:width 
            with:height
            with:fileName asFilename pathName
            with:(tableName ? '').
    ].
    
    scriptFile contents:script.        

    err := WriteStream on:String new.
    (OSProcess new 
        command:(cmd := 'r --silent --vanilla --slave -f %1' bindWith:scriptFile);
        errorStream:err;
        execute
    ) ifFalse:[
        self error:('cannot execute err: %1' bindWith: err contents)
    ].

    "/ self halt.
    "/ outFile asFilename exists.
    ((Smalltalk classNamed:#Image) fromFile:outFile asFilename) inspect.
    
    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
     tmax tableName:'Feldberg'.
     tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}.
    "
    "
     |data tmax|

     data := self fromFile:'/Users/exept/Desktop/klima/monatswerte_KL_01050_19240101_20181231_hist/produkt_klima_monat_19240101_20181231_01050.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM_BEGINN' 'MO_TT').
     tmax tableName:'Feldberg'.
     tmax plot:{ #x -> 'MESS_DATUM_BEGINN' . #y -> { 'MO_TT'  }}.
    "

    "Created: / 06-01-2019 / 21:32:43 / Claus Gittinger"
    "Modified: / 30-05-2019 / 11:28:42 / Claus Gittinger"
!

plotFile:fileName
    self plot:nil file:fileName
    
    "
     |data tmax|
     
     data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
     tmax tableName:'Test'.
     tmax plot.
    "

    "Created: / 06-01-2019 / 15:59:47 / Claus Gittinger"
    "Modified: / 06-01-2019 / 21:32:59 / Claus Gittinger"
! !

!TableData methodsFor:'processing'!

removeColumn:index
    "destructively remove a column"
    
    columnNames := columnNames copyWithoutIndex:index.
    rowData := rowData collect:[:row | row copyWithoutIndex:index].

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        removeColumn:1;
        inspect.
    "

    "Created: / 06-01-2019 / 13:05:41 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:50 / Claus Gittinger"
!

removeColumnNamed:name
    "destructively remove a column"

    self removeColumn:(self indexOfColumnNamed:name)

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        removeColumnNamed:'STATIONS_ID';
        removeColumnNamed:'QN_3';
        removeColumnNamed:'FX';
        removeColumnNamed:'FM';
        removeColumnNamed:'RSK';
        removeColumnNamed:'RSKF';
        removeColumnNamed:'SDK';
        removeColumnNamed:'SHK_TAG';
        removeColumnNamed:'NM';
        removeColumnNamed:'VPM';
        removeColumnNamed:'PM';
        removeColumnNamed:'TMK';
        removeColumnNamed:'UPM';
        removeColumnNamed:'eor';
        inspect.
    "

    "/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')

    "Created: / 06-01-2019 / 13:06:50 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:53 / Claus Gittinger"
!

withColumns:indexCollection
    "return a new TableData instance, containing only the given columns"
    
    |remainingNames|

    remainingNames := indexCollection collect:[:colIdx | columnNames at:colIdx] as:Array.
    ^ self class
        rows:(rowData collect:[:row | 
                indexCollection collect:[:colIdx | row at:colIdx] as:Array
                ])
        columnNames:remainingNames.

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 14:35:11 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:57 / Claus Gittinger"
!

withColumnsNamed:nameCollection
    "return a new TableData instance, containing only the given columns"

    ^ self withColumns:(nameCollection collect:[:nm |self indexOfColumnNamed:nm])

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withColumnsNamed:#('MESS_DATUM' 'TXK').
    "

    "Created: / 06-01-2019 / 14:36:16 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:13:45 / Claus Gittinger"
!

withoutColumn:index
    "return a new TableData instance, without the given columns"

    ^ self class
        rows:(rowData collect:[:row | row copyWithoutIndex:index])
        columnNames:(columnNames copyWithoutIndex:index).

    "
     self new 
        readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        withoutColumn:1.
    "

    "Created: / 06-01-2019 / 13:27:11 / Claus Gittinger"
    "Modified (comment): / 10-01-2019 / 16:25:56 / Claus Gittinger"
!

withoutColumns:indexCollection
    "return a new TableData instance, without the given columns"

    |remainingColIndices remainingNames|

    remainingColIndices := (1 to:columnNames size) asNewOrderedCollection removeAll:indexCollection; yourself.
    remainingNames := remainingColIndices collect:[:colIdx | columnNames at:colIdx] as:Array.
    ^ self class
        rows:(rowData collect:[:row | 
                remainingColIndices collect:[:colIdx | row at:colIdx] as:Array
                ])
        columnNames:remainingNames.

    "
     (self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
        withoutColumns:#(1 2 3).
    "

    "Created: / 06-01-2019 / 14:32:08 / Claus Gittinger"
    "Modified (comment): / 09-01-2019 / 12:14:29 / Claus Gittinger"
! !

!TableData methodsFor:'reading'!

readCSV:filename
    ^ self readCSV:filename separator:$; skip:0

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "
     self new 
        readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv';
        inspect.
    "
    "Created: / 09-01-2019 / 12:07:27 / Claus Gittinger"
!

readCSV:filename separator:separatorCharacter
    ^ self readCSV:filename separator:separatorCharacter skip:0

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "Created: / 09-01-2019 / 12:11:11 / Claus Gittinger"
!

readCSV:filename separator:separatorCharacter skip:numLinesToSkip
    |headLine numCols dataLine ls word row rows|

    filename asFilename readingFileDo:[:s |
        numLinesToSkip timesRepeat:[s nextLine].

        rows := OrderedCollection new.
        headLine := s nextLine.
        numCols := (headLine occurrencesOf:separatorCharacter)+1.
        columnNames := (headLine splitBy:separatorCharacter) collect:#withoutSeparators.
        [s atEnd] whileFalse:[
            dataLine := s nextLine.
            ls := dataLine readStream.
            row := OrderedCollection new:numCols.
            [ls atEnd] whileFalse:[
                ls skipSeparators.
                word := (ls upTo:separatorCharacter) withoutSeparators.
                row add:word.
            ].
            rows add:(row asArray).
        ].
    ].
    rowData := rows.

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "Created: / 09-01-2019 / 12:11:11 / Claus Gittinger"
!

readCSV:filename skip:numLinesToSkip
    ^ self readCSV:filename separator:$; skip:numLinesToSkip

    "
     self new 
        readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
        inspect.
    "

    "
     self new 
        readCSV:'/Users/exept/Downloads/masie_4km_allyears_extent_sqkm.csv' skip:1;
        inspect.
    "
    "Created: / 09-01-2019 / 12:07:27 / Claus Gittinger"
!

readDemoCSV
    self readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.

    "Created: / 09-01-2019 / 12:07:52 / Claus Gittinger"
! !

!TableData methodsFor:'writing'!

writeCSVToStream:aStream
    "save myself as CSV onto a stream"
    
    columnNames 
        do:[:nm | 
            (nm includes:$;) ifTrue:[
                aStream nextPut:$"; nextPutAll:nm; nextPut:$".
            ] ifFalse:[
                aStream nextPutAll:nm
            ]]
        separatedBy:[aStream nextPut:$;].
    aStream cr.        
    rowData do:[:row |
        row
            do:[:col | 
                |s|
                s := col printString.
                (s includes:$;) ifTrue:[
                    aStream nextPut:$"; nextPutAll:s; nextPut:$".
                ] ifFalse:[
                    aStream nextPutAll:s
                ]
            ]
            separatedBy:[aStream nextPut:$;].
        aStream cr.        
    ].

    "
     |data tmax|
     
     data := self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
     tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
     String streamContents:[:s | tmax writeCSVToStream:s]
    "

    "Created: / 09-01-2019 / 17:48:25 / Claus Gittinger"
! !

!TableData class methodsFor:'documentation'!

version_CVS
    ^ '$Header$'
! !