"{ Encoding: utf8 }"
"{ Package: 'stx:libbasic2' }"
"{ NameSpace: Smalltalk }"
Object subclass:#TableData
instanceVariableNames:'rowData columnNames columnTypes tableName'
classVariableNames:''
poolDictionaries:''
category:'Collections-Sequenceable-Table'
!
!TableData class methodsFor:'documentation'!
documentation
" Unfinished
Ongoing work to support some algorithms on table data (such as CSV files).
Snippets to read and process tabular (CSV) data
Snippets to generate a plot.
The category DWD (Deutscher Wetter Dienst / German weather service)
contains specific code to deal with historic weather data.
[author:]
Claus Gittinger
[instance variables:]
rowData the actual data
columnNames name per column
columnTypes type per column (default is String)
tableName name - only used for labeling graphs
[class variables:]
[see also:]
"
! !
!TableData class methodsFor:'instance creation'!
fromFile:filename
^ self new readCSV:filename
"
self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'
"
"Created: / 06-01-2019 / 13:28:53 / Claus Gittinger"
"Modified: / 09-01-2019 / 12:08:02 / Claus Gittinger"
!
rows:rowData columnNames:names
^ self new rows:rowData columnNames:names
"Created: / 06-01-2019 / 13:25:37 / Claus Gittinger"
!
rows:rowData columnNames:names tableName:tableName
^ self new rows:rowData columnNames:names tableName:tableName
"Created: / 06-01-2019 / 19:24:47 / Claus Gittinger"
! !
!TableData methodsFor:'accessing'!
getColumn:index
"return a column (by index) as a vector"
^ rowData collect:[:row | row at:index].
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
getColumn:1.
"
"Created: / 06-01-2019 / 19:04:08 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:29:54 / Claus Gittinger"
!
getColumnNamed:name
"return a column (by name) as a vector"
^ self getColumn:(self indexOfColumnNamed:name)
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
getColumnNamed:'TXK'.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 19:04:24 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:30:00 / Claus Gittinger"
!
getColumns:indexCollection
"return multiple columns (by index vector) as a vector of columns"
^ rowData collect:[:row |
indexCollection collect:[:eachColIdx | row at:eachColIdx] as:Array.
].
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
getColumns:#(1 2 3).
"
"Created: / 06-01-2019 / 19:04:47 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:30:30 / Claus Gittinger"
!
getColumnsNamed:names
"return multiple columns (by name vector) as a vector of columns"
|colsIndices|
colsIndices := names collect:[:eachName | self indexOfColumnNamed:eachName].
^ self getColumns:colsIndices
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
getColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 19:04:56 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:30:41 / Claus Gittinger"
!
tableName:aString
"set the table's name"
tableName := aString.
"Modified (comment): / 30-05-2019 / 11:30:52 / Claus Gittinger"
!
tableNamePrefix:aString
"prepend a prefix to the table's name"
tableName := aString,tableName.
"Created: / 06-01-2019 / 21:13:12 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:31:08 / Claus Gittinger"
!
tableNameSuffix:aString
"append a suffix to the table's name"
tableName := tableName,aString.
"Created: / 06-01-2019 / 21:13:01 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:31:17 / Claus Gittinger"
! !
!TableData methodsFor:'analysis'!
addBincoSlidingMean3ForColumnNamed:colName
"add a column with the sliding binco mean (1/4 + 1/2 + 1/4).
As binco:3 is quite common,
that is a tuned version of addBincoSlidingMean:3 forColumnNamed:colName"
| outRows valColIdx prevRow firstIndex lastIndex meanColIdx|
valColIdx := self indexOfColumnNamed:colName.
outRows := OrderedCollection new.
prevRow := nil.
firstIndex := 1. lastIndex := rowData size.
rowData := rowData collectWithIndex:[:row :idx|
|slidingSum|
((idx > firstIndex) and:[idx < lastIndex]) ifTrue:[
slidingSum := (
((rowData at:idx-1) at:valColIdx)
+ (((rowData at:idx) at:valColIdx)*2)
+ ((rowData at:idx+1) at:valColIdx)
) / 4.
].
row copyWith:slidingSum.
].
meanColIdx := columnNames size+1.
(rowData at:firstIndex) at:meanColIdx put:((rowData at:firstIndex+1) at:meanColIdx).
(rowData at:lastIndex) at:meanColIdx put:((rowData at:lastIndex-1) at:meanColIdx).
columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
^ self
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerYearOfColumnNamed:'TXK')
addBincoSlidingMean3ForColumnNamed:'Mean_Per_Year_TXK')
tableName:'Feldberg/Schwarzwald';
plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' . }}.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 08-01-2019 / 00:10:16 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:32:34 / Claus Gittinger"
!
addBincoSlidingMean:n forColumnNamed:colName
"add a column with the sliding binco mean (1/2^n + ... + 1/4 + 1/2 + 1/4 + ... + 1/2^n)"
|nH outRows valColIdx prevRow firstIndex lastIndex meanColIdx wAll|
self assert:(n odd).
nH := n // 2.
valColIdx := self indexOfColumnNamed:colName.
outRows := OrderedCollection new.
prevRow := nil.
firstIndex := 1+nH. lastIndex := rowData size-nH.
wAll := 0.
1 to:nH do:[:delta |
wAll := wAll + (1 / (2 raisedTo:delta)).
wAll := wAll + (1 / (2 raisedTo:delta)).
].
wAll := wAll + 1.
rowData := rowData collectWithIndex:[:row :idx|
|slidingSum|
((idx >= firstIndex) and:[idx <= lastIndex]) ifTrue:[
slidingSum := 0.
"/ ... /8 /4 /2 /1 /2 /4 /8 ...
1 to:nH do:[:delta |
slidingSum := slidingSum + (((rowData at:idx-delta) at:valColIdx) / (2 raisedTo:delta)).
slidingSum := slidingSum + (((rowData at:idx+delta) at:valColIdx) / (2 raisedTo:delta)).
].
slidingSum := slidingSum + ((rowData at:idx) at:valColIdx).
slidingSum := slidingSum / wAll.
].
row copyWith:slidingSum.
].
meanColIdx := columnNames size+1.
1 to:nH do:[:delta |
(rowData at:0+delta) at:meanColIdx put:((rowData at:nH+1) at:meanColIdx)
].
1 to:nH do:[:delta |
(rowData at:rowData size+1-delta) at:meanColIdx put:((rowData at:rowData size-nH-1) at:meanColIdx)
].
columnNames := columnNames copyWith:('Sliding_Binco_Mean_of_',colName).
^ self
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerYearOfColumnNamed:'TXK')
addBincoSlidingMean:5 forColumnNamed:'Mean_Per_Year_TXK')
tableName:'Feldberg/Schwarzwald';
plot:{ #x -> 'MESS_DATUM' .
#y -> { 'Mean_Per_Year_TXK' . 'Sliding_Binco_Mean_of_Mean_Per_Year_TXK' . }}.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 07-01-2019 / 15:37:16 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:22:31 / Claus Gittinger"
!
addSlidingMean:n forColumnNamed:colName
"add a column with the sliding mean.
The sliding mean looks meaner, but may introduce lag (phase shift),
which binco avoids."
|slidingValues slidingSum outRows valColIdx prevYear sum countDaysPerYear|
valColIdx := self indexOfColumnNamed:colName.
"/ take the the first n values as initial sliding mean
slidingValues := (1 to:n) collect:[:rowIdx | (rowData at:rowIdx) at:valColIdx] as:OrderedCollection.
slidingSum := slidingValues sum.
outRows := OrderedCollection new.
prevYear := nil.
sum := countDaysPerYear := 0.
rowData := rowData collect:[:row |
| rowsDate rowsYear val|
slidingSum := slidingSum - slidingValues first.
slidingValues removeFirst.
slidingValues add:(row at:valColIdx).
slidingSum := slidingSum + slidingValues last.
row copyWith:(slidingSum / n).
].
columnNames := columnNames copyWith:('Sliding_Mean_of_',colName).
^ self
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerYearOfColumnNamed:'TXK')
addSlidingMean:11 forColumnNamed:'Mean_Per_Year_TXK')
tableName:'Feldberg/Schwarzwald';
plot:{ #x -> 'MESS_DATUM' . #y -> { 'Mean_Per_Year_TXK' . 'Sliding_Mean_of_Mean_Per_Year_TXK' . }}.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 20:05:42 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:33:16 / Claus Gittinger"
!
extractRowsWhere:filterBlock
"return a new table containing only rows for which filterBlock evaluates to true"
|outRows|
outRows := rowData select:filterBlock.
^ self class
rows:outRows columnNames:columnNames
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractRowsWhere:[:row | (row at:1) startsWith:'1945'])
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractRowsWhere:[:row | ((row at:1) from:5 to:6) = '01'])
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:8)
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:9)
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10)
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10)
tableNameSuffix:'-Echterdingen';
plot
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 09-01-2019 / 12:35:44 / Claus Gittinger"
! !
!TableData methodsFor:'analysis - DWD'!
extractMonth:monthIndex
"return a new table containing only rows for that month.
This is specific to DWD data"
^ self extractMonth:monthIndex columnName:'MESS_DATUM' format:'%4y%2m%2d'
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
tableNameSuffix:'-Echterdingen';
plot
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 19:00:52 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 18:30:09 / Claus Gittinger"
!
extractMonth:monthIndex columnName:monthColumnName
"return a new table containing only rows for that month.
This is specific to DWD data"
^ self extractMonth:monthIndex columnName:monthColumnName format:'%4y%2m%2d'
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
tableNameSuffix:'-Echterdingen';
plot
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 09-01-2019 / 14:42:05 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 18:31:31 / Claus Gittinger"
!
extractMonth:monthIndex columnName:monthColumnName format:dateFormat
"return a new table containing only rows for that month.
This is specific to DWD data"
|dateColIdx|
dateColIdx := self indexOfColumnNamed:monthColumnName.
^ (self
extractRowsWhere:[:row |
| rowsDate rowsYearAndMonth val|
rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
rowsDate month = monthIndex
])
tableName:(Date nameOfMonth:monthIndex),' Mean'
"
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:1 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:2 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:8 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:9 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:10 columnName:'MESS_DATUM' format:'%4y%2m')
tableNameSuffix:'-Echterdingen';
plot
(((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt')
meanPerMonthOfColumnNamed:'TXK')
extractMonth:3 columnName:'MESS_DATUM' format:'%4y%2m')
tableNameSuffix:'-Echterdingen';
plot
|table|
table := (self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19530101_20171231_04931.txt').
1 to:12 do:[:month |
((table
meanPerMonthOfColumnNamed:'TXK')
extractMonth:month columnName:'MESS_DATUM' format:'%4y%2m')
tableNameSuffix:'-Echterdingen';
plot
]
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 09-01-2019 / 18:27:28 / Claus Gittinger"
!
extractMonth:month day:day
"return a new table containing only rows for that day in month"
^ self extractMonth:month day:day columnName:'MESS_DATUM' format:'%4y%2m%2d'
"
((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
extractMonth:1 day:1)
tableName:'1st Januar, Feldberg';
plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 13:15:46 / Claus Gittinger"
"Modified (comment): / 10-01-2019 / 16:20:48 / Claus Gittinger"
!
extractMonth:month day:day columnName:monthColumnName format:dateFormat
"return a new table containing only rows for that day in month"
|outRows dateColIdx|
dateColIdx := self indexOfColumnNamed:monthColumnName.
outRows := rowData select:[:row |
|rowsDate|
rowsDate := Date readFrom:(row at:dateColIdx) format:dateFormat.
(rowsDate month = month) and:[rowsDate day = day].
].
^ self class rows:outRows columnNames:columnNames
"
((self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
extractMonth:1 day:1 columnName:'MESS_DATUM' format:'%4y%2m%2d')
plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 10-01-2019 / 16:14:29 / Claus Gittinger"
!
meanPerMonthOfColumnNamed:colName
"return a new table containing the arithmetic mean per month of a column"
|outRows valColIdx dateColIdx prevYearAndMonth sum countDaysPerMonth|
dateColIdx := self indexOfColumnNamed:'MESS_DATUM'.
valColIdx := self indexOfColumnNamed:colName.
outRows := OrderedCollection new.
prevYearAndMonth := nil.
sum := countDaysPerMonth := 0.
rowData do:[:row |
| rowsDate rowsYearAndMonth val|
rowsDate := row at:dateColIdx.
"/ yyyymmdd
rowsYearAndMonth := rowsDate copyTo:6.
prevYearAndMonth isNil ifTrue:[
prevYearAndMonth := rowsYearAndMonth.
sum := countDaysPerMonth := 0.
].
(rowsYearAndMonth = prevYearAndMonth) ifTrue:[
val := Number readFrom:(row at:valColIdx).
sum := sum + val.
countDaysPerMonth := countDaysPerMonth + 1.
] ifFalse:[
outRows add:{ prevYearAndMonth . (sum / countDaysPerMonth) }.
sum := countDaysPerMonth := 0.
prevYearAndMonth := rowsYearAndMonth.
].
].
^ self class
rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean per month of ',colName }
tableName:('Mean Per Month of ',colName)
"
((self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerMonthOfColumnNamed:'TXK')
plot
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 19:06:36 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:24:34 / Claus Gittinger"
!
meanPerYearOfColumnNamed:colName
"return a new table containing the arithmetic mean per year of a column"
|outRows valColIdx dateColIdx prevYear sum countDaysPerYear|
dateColIdx := self indexOfColumnNamed:'MESS_DATUM'.
valColIdx := self indexOfColumnNamed:colName.
outRows := OrderedCollection new.
prevYear := nil.
sum := countDaysPerYear := 0.
rowData do:[:row |
| rowsDate rowsYear val|
rowsDate := row at:dateColIdx.
"/ yyyymmdd
rowsYear := rowsDate copyTo:4.
prevYear isNil ifTrue:[
prevYear := rowsYear.
sum := countDaysPerYear := 0.
].
(rowsYear = prevYear) ifTrue:[
val := Number readFrom:(row at:valColIdx).
sum := sum + val.
countDaysPerYear := countDaysPerYear + 1.
] ifFalse:[
outRows add:{ prevYear . (sum / countDaysPerYear) }.
sum := countDaysPerYear := 0.
prevYear := rowsYear.
].
].
^ self class
rows:outRows columnNames:{ 'MESS_DATUM' . 'Mean_Per_Year_',colName }
tableName:('Mean Per Year of ',colName)
"
((self new
read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
meanPerYearOfColumnNamed:'TXK')
plot
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 19:40:34 / Claus Gittinger"
"Modified: / 06-01-2019 / 22:34:06 / Claus Gittinger"
! !
!TableData methodsFor:'helpers'!
indexOfColumnNamed:name
"find a column index by name"
^ columnNames indexOf:name ifAbsent:[self error:'no such column'].
"
self new
read:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
indexOfColumnNamed:'MESS_DATUM'.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 13:20:47 / Claus Gittinger"
"Modified (comment): / 30-05-2019 / 11:33:23 / Claus Gittinger"
! !
!TableData methodsFor:'instance creation'!
rows:rowDataArg columnNames:columnNamesArg
rowData := rowDataArg.
columnNames := columnNamesArg.
"Created: / 06-01-2019 / 13:26:01 / Claus Gittinger"
!
rows:rowDataArg columnNames:columnNamesArg tableName:tableNameArg
rowData := rowDataArg.
columnNames := columnNamesArg.
tableName := tableNameArg.
"Created: / 06-01-2019 / 19:25:10 / Claus Gittinger"
! !
!TableData methodsFor:'plotting'!
plot
self plot:nil
"
|data tmax|
data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
"
"Created: / 06-01-2019 / 18:47:27 / Claus Gittinger"
"Modified: / 06-01-2019 / 21:32:22 / Claus Gittinger"
!
plot:optionalSpec
|tmpFile|
[
tmpFile := Filename newTemporary.
tmpFile writingFileDo:[:s |
self writeCSVToStream:s.
].
"/ FileBrowserV2 openOn:tmpFile.
self plot:optionalSpec file:tmpFile.
] ensure:[
tmpFile delete
].
"
|data tmax|
data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' 'TNK' }} .
"
"Created: / 06-01-2019 / 21:32:09 / Claus Gittinger"
"Modified: / 09-01-2019 / 18:40:16 / Claus Gittinger"
!
plot:optionalSpec file:fileName
|script scriptFile outFile width height spec xCol yCols|
scriptFile := Filename newTemporary .
outFile := '/tmp/plot3.png'.
outFile asFilename contents:''.
width := 1200.
height := 400.
optionalSpec notNil ifTrue:[
spec := optionalSpec isDictionary
ifTrue:[optionalSpec]
ifFalse:[Dictionary withAssociations:optionalSpec].
yCols := spec at:#y.
xCol := spec at:#x.
script := '
png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")
',xCol,' <- Data$',xCol,'
',((yCols collect:[:col | col,' <- Data$',col]) asStringWith:Character cr),
"/'plot(',xCol,', '
"/ ,((yCols collect:[:col | col,', ']) asStringWith:''),
"/' type="l", main = "%5")'
'
',('plot(%1, %2, type="b", col=2, main = "%%5")' bindWith:xCol with:yCols first),'
',((yCols from:2 collect:[:ycol | 'lines(%1, %2, type="l", col=3)' bindWith:xCol with:ycol]) asStringWith:Character cr)
,'
# dev.off()
'
bindWith:outFile
with:width
with:height
with:fileName asFilename pathName
with:(tableName ? '').
"/ self halt.
] ifFalse:[
script := '
# library(ggplot2)
# library(ggfortify)
png(filename="%1", height=%3, width=%2)
Data <- read.csv(file="%4", header=TRUE, sep=";")
# theme_set(theme_classic())
# Plot
# autoplot(Data) +
# labs(title="Data") +
# theme(plot.title = element_text(hjust=0.1))
plot(Data, type="l", main = "%5")
# plot(Data, cex = .4)
dev.off()
'
bindWith:outFile
with:width
with:height
with:fileName asFilename pathName
with:(tableName ? '').
].
scriptFile contents:script.
OSProcess new
command:('r --silent --vanilla --slave -f %1' bindWith:scriptFile);
execute.
"/ self halt.
"/ outFile asFilename exists.
((Smalltalk classNamed:#Image) fromFile:outFile asFilename) inspect.
"
|data tmax|
data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
tmax tableName:'Feldberg'.
tmax plot:{ #x -> 'MESS_DATUM' . #y -> { 'TXK' . 'TNK' }}.
"
"Created: / 06-01-2019 / 21:32:43 / Claus Gittinger"
"Modified: / 30-05-2019 / 11:28:42 / Claus Gittinger"
!
plotFile:fileName
self plot:nil file:fileName
"
|data tmax|
data := self fromFile:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK').
tmax tableName:'Test'.
tmax plot.
"
"Created: / 06-01-2019 / 15:59:47 / Claus Gittinger"
"Modified: / 06-01-2019 / 21:32:59 / Claus Gittinger"
!
writeCSVToStream:aStream
"save myself as CSV onto a stream"
columnNames
do:[:nm |
(nm includes:$;) ifTrue:[
aStream nextPut:$"; nextPutAll:nm; nextPut:$".
] ifFalse:[
aStream nextPutAll:nm
]]
separatedBy:[aStream nextPut:$;].
aStream cr.
rowData do:[:row |
row
do:[:col |
|s|
s := col printString.
(s includes:$;) ifTrue:[
aStream nextPut:$"; nextPutAll:s; nextPut:$".
] ifFalse:[
aStream nextPutAll:s
]
]
separatedBy:[aStream nextPut:$;].
aStream cr.
].
"
|data tmax|
data := self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
tmax := data withColumnsNamed:#('MESS_DATUM' 'TXK' 'TNK').
String streamContents:[:s | tmax writeCSVToStream:s]
"
"Created: / 09-01-2019 / 17:48:25 / Claus Gittinger"
! !
!TableData methodsFor:'processing'!
removeColumn:index
"destructively remove a column"
columnNames := columnNames copyWithoutIndex:index.
rowData := rowData collect:[:row | row copyWithoutIndex:index].
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
removeColumn:1;
inspect.
"
"Created: / 06-01-2019 / 13:05:41 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:13:50 / Claus Gittinger"
!
removeColumnNamed:name
"destructively remove a column"
self removeColumn:(self indexOfColumnNamed:name)
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
removeColumnNamed:'STATIONS_ID';
removeColumnNamed:'QN_3';
removeColumnNamed:'FX';
removeColumnNamed:'FM';
removeColumnNamed:'RSK';
removeColumnNamed:'RSKF';
removeColumnNamed:'SDK';
removeColumnNamed:'SHK_TAG';
removeColumnNamed:'NM';
removeColumnNamed:'VPM';
removeColumnNamed:'PM';
removeColumnNamed:'TMK';
removeColumnNamed:'UPM';
removeColumnNamed:'eor';
inspect.
"
"/ StringCollection('MESS_DATUM' 'QN_3' 'FX' 'FM' 'QN_4' 'RSK' 'RSKF' 'SDK' 'SHK_TAG' 'NM' 'VPM' 'PM' 'TMK' 'UPM' 'TXK' 'TNK' 'TGK' 'eor')
"Created: / 06-01-2019 / 13:06:50 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:13:53 / Claus Gittinger"
!
withColumns:indexCollection
"return a new TableData instance, containing only the given columns"
|remainingNames|
remainingNames := indexCollection collect:[:colIdx | columnNames at:colIdx] as:Array.
^ self class
rows:(rowData collect:[:row |
indexCollection collect:[:colIdx | row at:colIdx] as:Array
])
columnNames:remainingNames.
"
(self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
withColumns:#(1 2 3).
"
"Created: / 06-01-2019 / 14:35:11 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:13:57 / Claus Gittinger"
!
withColumnsNamed:nameCollection
"return a new TableData instance, containing only the given columns"
^ self withColumns:(nameCollection collect:[:nm |self indexOfColumnNamed:nm])
"
(self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
withColumnsNamed:#('MESS_DATUM' 'TXK').
"
"Created: / 06-01-2019 / 14:36:16 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:13:45 / Claus Gittinger"
!
withoutColumn:index
"return a new TableData instance, without the given columns"
^ self class
rows:(rowData collect:[:row | row copyWithoutIndex:index])
columnNames:(columnNames copyWithoutIndex:index).
"
self new
readCSV:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
withoutColumn:1.
"
"Created: / 06-01-2019 / 13:27:11 / Claus Gittinger"
"Modified (comment): / 10-01-2019 / 16:25:56 / Claus Gittinger"
!
withoutColumns:indexCollection
"return a new TableData instance, without the given columns"
|remainingColIndices remainingNames|
remainingColIndices := (1 to:columnNames size) asNewOrderedCollection removeAll:indexCollection; yourself.
remainingNames := remainingColIndices collect:[:colIdx | columnNames at:colIdx] as:Array.
^ self class
rows:(rowData collect:[:row |
remainingColIndices collect:[:colIdx | row at:colIdx] as:Array
])
columnNames:remainingNames.
"
(self fromFile:'~/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt')
withoutColumns:#(1 2 3).
"
"Created: / 06-01-2019 / 14:32:08 / Claus Gittinger"
"Modified (comment): / 09-01-2019 / 12:14:29 / Claus Gittinger"
! !
!TableData methodsFor:'reading'!
readCSV:filename
^ self readCSV:filename separator:$;
"
self new
readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
inspect.
"
"Created: / 09-01-2019 / 12:07:27 / Claus Gittinger"
!
readCSV:filename separator:separatorCharacter
|headLine numCols dataLine ls word row rows|
filename asFilename readingFileDo:[:s |
rows := OrderedCollection new.
headLine := s nextLine.
numCols := (headLine occurrencesOf:separatorCharacter)+1.
columnNames := (headLine splitBy:separatorCharacter) collect:#withoutSeparators.
[s atEnd] whileFalse:[
dataLine := s nextLine.
ls := dataLine readStream.
row := OrderedCollection new:numCols.
[ls atEnd] whileFalse:[
ls skipSeparators.
word := (ls upTo:separatorCharacter) withoutSeparators.
row add:word.
].
rows add:(row asArray).
].
].
rowData := rows.
"
self new
readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt';
inspect.
"
"Created: / 09-01-2019 / 12:11:11 / Claus Gittinger"
!
readDemoCSV
self readCSV:'/Users/cg/Documents/klima/data/DWD/daily/kl/historical/produkt_klima_tag_19450101_20171231_01346.txt'.
"Created: / 09-01-2019 / 12:07:52 / Claus Gittinger"
! !
!TableData class methodsFor:'documentation'!
version_CVS
^ '$Header$'
! !