Nixdae Nixdae - 1 month ago 9
R Question

How to read out a CET/CEST time series from Excel with R when DST daylight saving time is cause of repeated timestamps?

I have the follwing Excel inputs from an electricity provider...


  1. column: Excel's time differences between values in 2. column

  2. column: Datetimes given by the Swiss electricity provider obviously in CET/CEST format

  3. column: datetimes in Excel format

  4. column: the resulting datetimes from column 3

  5. column and following: values




0.010416667; 25.10.2015 00:00; 42302.00; 25.10.2015 00:00; 1353500.886
0.010416667; 25.10.2015 00:15; 42302.01; 25.10.2015 00:15; 1358036.992
0.010416667; 25.10.2015 00:30; 42302.02; 25.10.2015 00:30; 1336141.202
0.010416667; 25.10.2015 00:45; 42302.03; 25.10.2015 00:45; 1339588.722
0.010416667; 25.10.2015 01:00; 42302.04; 25.10.2015 01:00; 1309298.517
0.010416667; 25.10.2015 01:15; 42302.05; 25.10.2015 01:15; 1312979.406
0.010416667; 25.10.2015 01:30; 42302.06; 25.10.2015 01:30; 1263912.381
0.010416667; 25.10.2015 01:45; 42302.07; 25.10.2015 01:45; 1263811.518
0.010416667; 25.10.2015 02:00; 42302.08; 25.10.2015 02:00; 1232290.791
0.010416667; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1258868.764
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1237087.155
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1231009.94
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1234285.809
-0.03125; 25.10.2015 02:15; 42302.09; 25.10.2015 02:15; 1220952.562
0.010416667; 25.10.2015 02:30; 42302.10; 25.10.2015 02:30; 1184250.718
0.010416667; 25.10.2015 02:45; 42302.11; 25.10.2015 02:45; 1169521.089
0.010416667; 25.10.2015 03:00; 42302.13; 25.10.2015 03:00; 1149841.273
0.010416667; 25.10.2015 03:15; 42302.14; 25.10.2015 03:15; 1157376.444
0.010416667; 25.10.2015 03:30; 42302.15; 25.10.2015 03:30; 1159568.082
0.010416667; 25.10.2015 03:45; 42302.16; 25.10.2015 03:45; 1176631.296
0.010416667; 25.10.2015 04:00; 42302.17; 25.10.2015 04:00; 1170186.402
0.010416667; 25.10.2015 04:15; 42302.18; 25.10.2015 04:15; 1174296.3
0.010416667; 25.10.2015 04:30; 42302.19; 25.10.2015 04:30; 1170491.442
0.010416667; 25.10.2015 04:45; 42302.20; 25.10.2015 04:45; 1168962.826
0.010416667; 25.10.2015 05:00; 42302.21; 25.10.2015 05:00; 1155760.463
0.010416667; 25.10.2015 05:15; 42302.22; 25.10.2015 05:15; 1184186.085
0.010416667; 25.10.2015 05:30; 42302.23; 25.10.2015 05:30; 1188887.774


Full Excel data can be downloaded from Swissgrid homepage: Swissgrid Data

The data is recorded during time shift of daylight saving.
I read out the data with following code:

read_SwissgridExcel_v2015 <- function(path,sheet=3){
wb1 <- read_excel(path, sheet=sheet, col_names=TRUE, skip=1)
names(wb1)[1] <-"Col_1"
wb1$Col_1 <- as.POSIXct(round.POSIXt(wb1$Col_1),tz="Europe/Berlin",usetz=TRUE)
SwissgridDat <- wb1
attr(SwissgridDat,"description")<-names(SwissgridDat)
names(SwissgridDat) <- c("Col_1",
"Cons_Total_Enduse_Swiss_ControlBlock",
"Prod_Total_Swiss_ControlBlock",
"Cons_Total_Swiss_ControlBlock",
"Net_Outflow_Swiss_Transm_Grid",
"Grid_FeedIn_Swiss_Transm_Grid",
"Control_Energy_Pos_Sec",
"Control_Energy_Neg_Sec",
"Control_Energy_Pos_Ter",
"Control_Energy_Neg_Ter",
"Cross_Border_Exchange_CH_AT",
"Cross_Border_Exchange_AT_CH",
"Cross_Border_Exchange_CH_DE",
"Cross_Border_Exchange_DE_CH",
"Cross_Border_Exchange_CH_FR",
"Cross_Border_Exchange_FR_CH",
"Cross_Border_Exchange_CH_IT",
"Cross_Border_Exchange_IT_CH",
"Transit",
"Import",
"Export",
"Prices_Avg_Pos_Sec_Control_Energy",
"Prices_Avg_Neg_Sec_Control_Energy",
"Prices_Avg_Pos_Ter_Control_Energy",
"Prices_Avg_Neg_Ter_Control_Energy",
"Prod_Canton_AG",
"Cons_Canton_AG",
"Prod_Canton_FR",
"Cons_Canton_FR",
"Prod_Canton_GL",
"Cons_Canton_GL",
"Prod_Canton_GR",
"Cons_Canton_GR",
"Prod_Canton_LU",
"Cons_Canton_LU",
"Prod_Canton_NE",
"Cons_Canton_NE",
"Prod_Canton_SO",
"Cons_Canton_SO",
"Prod_Canton_SG",
"Cons_Canton_SG",
"Prod_Canton_TI",
"Cons_Canton_TI",
"Prod_Canton_TG",
"Cons_Canton_TG",
"Prod_Canton_VS",
"Cons_Canton_VS",
"Prod_Canton_AI_AR",
"Cons_Canton_AI_AR",
"Prod_Canton_BL_BS",
"Cons_Canton_BL_BS",
"Prod_Canton_BE_JU",
"Cons_Canton_BE_JU",
"Prod_Canton_SZ_ZG",
"Cons_Canton_SZ_ZG",
"Prod_Canton_OW_NW_UR",
"Cons_Canton_OW_NW_UR",
"Prod_Canton_GE_VD",
"Cons_Canton_GE_VD",
"Prod_Canton_SH_ZH",
"Cons_Canton_SH_ZH",
"Prod_Cantons",
"Cons_Cantons",
"Prod_Foreign_Territories_SwissControlZone",
"Cons_Foreign_Territories_SwissControlZone"
)
SwissgridDat <- SwissgridDat %>% transform(SelfProdEndUse=pmin(Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock))
SwissgridDat <- SwissgridDat %>% transform(ImportEndUse= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock,Cons_Total_Enduse_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0))
SwissgridDat <- SwissgridDat %>% transform(ImportNonEndUsed= ifelse(Cons_Total_Enduse_Swiss_ControlBlock>Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0) +
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Prod_Total_Swiss_ControlBlock,0)
)
SwissgridDat <- SwissgridDat %>% transform(Export2= ifelse(Prod_Total_Swiss_ControlBlock >= Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Swiss_ControlBlock,0)
)

SwissgridDat <- SwissgridDat %>% transform(SelfProdOwnUse=
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock<=Cons_Total_Swiss_ControlBlock,
Prod_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)+
ifelse(Cons_Total_Enduse_Swiss_ControlBlock<Prod_Total_Swiss_ControlBlock &
Prod_Total_Swiss_ControlBlock>Cons_Total_Swiss_ControlBlock,
Cons_Total_Swiss_ControlBlock-Cons_Total_Enduse_Swiss_ControlBlock,0)
)
return(SwissgridDat)
}
DataPath <- getwd()
SwissgridDat <- read_SwissgridExcel_v2015(paste(DataPath, "/EnergieUebersichtCH_2015.xlsx", sep = ""))


Printing out the timestamps with following

SwissgridDat$Col_1[28510:28525]


results in

[1] "2015-10-25 00:30:00 CEST" "2015-10-25 00:45:00 CEST" "2015-10-25 01:00:00 CEST" "2015-10-25 01:15:00 CEST" "2015-10-25 01:30:00 CEST" "2015-10-25 01:45:00 CEST"
[7] "2015-10-25 02:00:00 CET" "2015-10-25 02:15:00 CET" "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 02:15:00 CET"
[13] "2015-10-25 02:30:00 CET" "2015-10-25 02:45:00 CET" "2015-10-25 03:00:00 CET" "2015-10-25 03:15:00 CET"


Problem: as you can see the shift hour is both printed in CET time, although the logic says that the first hour 02:00 to 03:00 has to be CEST and then the second hour from 02:15 to 03:00 in CET format.

Did I do something wrong or is this a bug? How would you do the readout?

My goal is to transform everything in UTC anyway, but data is unfortunately not in UTC format.

Thx a lot for your help on this.

Answer

I thought of a simpler approach since you want UTC only:

start_posix <- as.POSIXct("2014-01-01 00:00:00", tz="Europe/Zurich") #this is your first time stamp change as appropriate (e.g SwissgridDat$Col_1[1])
attr(start_posix, "tzone")<-"UTC"
len_out<-nrow(SwissgridDat$Col_1) 
len_out<-1000 # remove this line in your case
seq.POSIXt(from=start_posix,by=15*60,length.out = len_out)

That will give you a nice regular 15min UTC timestamp as long as no data is missing

EDIT:

Here's just the correction for the DLS hours. Feel free to generalize to several years of data, if needed...

Test data:

25.10.2015 00:00;42302
25.10.2015 00:15;42302.01
25.10.2015 00:30;42302.02
25.10.2015 00:45;42302.03
25.10.2015 01:00;42302.04
25.10.2015 01:15;42302.05
25.10.2015 01:30;42302.06
25.10.2015 01:45;42302.07
25.10.2015 02:00;42302.08
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 02:15;42302.09
25.10.2015 02:30;42302.1
25.10.2015 02:45;42302.11
25.10.2015 03:00;42302.13
25.10.2015 03:15;42302.14
25.10.2015 03:30;42302.15
25.10.2015 03:45;42302.16
25.10.2015 04:00;42302.17
25.10.2015 04:15;42302.18
25.10.2015 04:30;42302.19
25.10.2015 04:45;42302.2
25.10.2015 05:00;42302.21
25.10.2015 05:15;42302.22
25.10.2015 05:30;42302.23

Code:

data <- read.delim("test.csv", sep=";", dec=".", header =F, col.names = c("time", "somedata"))
data$time <- as.POSIXct(data$time, "%d.%m.%Y %H:%M", tz="Europe/Zurich")
target <- which(duplicated(data$time))
len_out <- length(target)*2-1 #need to run over duplicated and originals
last_target <- max(target)

for (i in seq(0,len_out)){

  diff_increment <- as.numeric(difftime(data$time[last_target-i+2],data$time[last_target-i+1], units="secs")) #get correct increment from a little further out
  data$time[last_target-i]<-data$time[last_target-i+1]-diff_increment
}

This snippet will correct the day light saving hours in your data after the read-in.