runjumpfly runjumpfly - 1 month ago 6
R Question

Merging two xts to create NAs at certain time stamps

I have an irregular xts series where I want to include NAs at proper minutes eg.g 09:15:00, 09:16:00 and so on if these proper minutes are missing there. I am doing it through creating another series that has NA series with theses timestamps.
This is my series:

dput(data1)
structure(c(108.4, 108.35, 108.35, 108.4, 108.2, 108.35, 108.25,
108.25, 108.25, 108.25, 108.25, 108.25, 108.35, 108.3, 108.25,
108.3, 108.25, 108.25, 108.2, 108.15, 108.2, 108.1, 108.25, 108.25,
108.1, 108.2, 108.25, 108.15, 108.15, 108.1, 108.2, 108.1, 108.25,
108.3, 108.25, 108.2, 108.2, 108.2, 108.1, 108.1, 108.1, 108.1,
108.1, 108.15, 108.15, 108.05, 108.1, 108, 108, 108, 108.1, 108.05,
108, 108.05, 108, 108.05, 107.9, 107.95, 107.85, 107.85, 107.85,
107.85, 107.9, 107.9, 107.9, 108, 107.85, 107.9, 107.9, 107.9,
108, 107.9, 108, 108, 108, 107.95, 107.95, 108, 108, 108, 108,
107.95, 108, 108, 108, 108.05, 108.05, 108.1, 108.05, 108.1,
108.1, 108.1, 108.1, 108.1, 108.15, 108.05, 108.1, 108.05, 108.05,
108.05), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "Asia/Calcutta", tclass = c("POSIXct",
"POSIXt"), tzone = "Asia/Calcutta", index = structure(c(1459481851,
1459482301, 1459482303, 1459482304, 1459482305, 1459482306, 1459482307,
1459482308, 1459482309, 1459482310, 1459482311, 1459482312, 1459482313,
1459482314, 1459482315, 1459482316, 1459482317, 1459482318, 1459482319,
1459482320, 1459482321, 1459482322, 1459482323, 1459482324, 1459482325,
1459482326, 1459482327, 1459482328, 1459482329, 1459482330, 1459482331,
1459482332, 1459482333, 1459482334, 1459482335, 1459482336, 1459482337,
1459482339, 1459482340, 1459482341, 1459482342, 1459482343, 1459482344,
1459482345, 1459482347, 1459482348, 1459482349, 1459482350, 1459482351,
1459482352, 1459482353, 1459482354, 1459482355, 1459482356, 1459482357,
1459482358, 1459482360, 1459482361, 1459482362, 1459482363, 1459482364,
1459482365, 1459482366, 1459482367, 1459482368, 1459482369, 1459482370,
1459482371, 1459482372, 1459482374, 1459482375, 1459482376, 1459482377,
1459482378, 1459482379, 1459482380, 1459482381, 1459482382, 1459482384,
1459482385, 1459482386, 1459482387, 1459482388, 1459482389, 1459482390,
1459482392, 1459482393, 1459482394, 1459482395, 1459482396, 1459482397,
1459482398, 1459482399, 1459482401, 1459482402, 1459482403, 1459482404,
1459482405, 1459482406, 1459482407), tzone = "Asia/Calcutta", tclass = c("POSIXct",
"POSIXt")), .Dim = c(100L, 1L), .Dimnames = list(NULL, "SYMBOL"), class = c("xts", "zoo"))


NA series:

dput(NA.df)
structure(c(NA, NA, NA), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = structure("Asia/Calcutta", .Names = "TZ"), tzone = structure("Asia/Calcutta", .Names = "TZ"), index = structure(c(1459741500,
1459741560, 1459741620), tzone = structure("Asia/Calcutta", .Names = "TZ"), tclass = c("POSIXct",
"POSIXt")), .Dim = c(3L, 1L), .Dimnames = list(NULL, "Empty"))


This is my code:

data2<-merge(data1,NA.df, all=TRUE)[,1:ncol(data1)]


This the result:

dput(data2)
structure(c(108.4, 108.35, 108.35, 108.4, 108.2, 108.35, 108.25,
108.25, 108.25, 108.25, 108.25, 108.25, 108.35, 108.3, 108.25,
108.3, 108.25, 108.25, 108.2, 108.15, 108.2, 108.1, 108.25, 108.25,
108.1, 108.2, 108.25, 108.15, 108.15, 108.1, 108.2, 108.1, 108.25,
108.3, 108.25, 108.2, 108.2, 108.2, 108.1, 108.1, 108.1, 108.1,
108.1, 108.15, 108.15, 108.05, 108.1, 108, 108, 108, 108.1, 108.05,
108, 108.05, 108, 108.05, 107.9, 107.95, 107.85, 107.85, 107.85,
107.85, 107.9, 107.9, 107.9, 108, 107.85, 107.9, 107.9, 107.9,
108, 107.9, 108, 108, 108, 107.95, 107.95, 108, 108, 108, 108,
107.95, 108, 108, 108, 108.05, 108.05, 108.1, 108.05, 108.1,
108.1, 108.1, 108.1, 108.1, 108.15, 108.05, 108.1, 108.05, 108.05,
108.05, NA, NA, NA), index = structure(c(1459481851, 1459482301,
1459482303, 1459482304, 1459482305, 1459482306, 1459482307, 1459482308,
1459482309, 1459482310, 1459482311, 1459482312, 1459482313, 1459482314,
1459482315, 1459482316, 1459482317, 1459482318, 1459482319, 1459482320,
1459482321, 1459482322, 1459482323, 1459482324, 1459482325, 1459482326,
1459482327, 1459482328, 1459482329, 1459482330, 1459482331, 1459482332,
1459482333, 1459482334, 1459482335, 1459482336, 1459482337, 1459482339,
1459482340, 1459482341, 1459482342, 1459482343, 1459482344, 1459482345,
1459482347, 1459482348, 1459482349, 1459482350, 1459482351, 1459482352,
1459482353, 1459482354, 1459482355, 1459482356, 1459482357, 1459482358,
1459482360, 1459482361, 1459482362, 1459482363, 1459482364, 1459482365,
1459482366, 1459482367, 1459482368, 1459482369, 1459482370, 1459482371,
1459482372, 1459482374, 1459482375, 1459482376, 1459482377, 1459482378,
1459482379, 1459482380, 1459482381, 1459482382, 1459482384, 1459482385,
1459482386, 1459482387, 1459482388, 1459482389, 1459482390, 1459482392,
1459482393, 1459482394, 1459482395, 1459482396, 1459482397, 1459482398,
1459482399, 1459482401, 1459482402, 1459482403, 1459482404, 1459482405,
1459482406, 1459482407, 1459516500, 1459516560, 1459516620), tzone = "Asia/Calcutta", tclass = c("POSIXct",
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct",
"POSIXt"), .indexTZ = "Asia/Calcutta", tclass = c("POSIXct",
"POSIXt"), tzone = "Asia/Calcutta", .Dim = c(103L, 1L), .Dimnames = list(
NULL, "SYMBOL"))


But it does not has NA at 9:15:, 9:16:00, and so on. Please help me to find out where I am mistaking in my code. Thanks

Answer

Your output is this:

> data2<-merge(data1, NA.df)
> tail(data2)
                    SYMBOL Empty
2016-04-01 09:16:45 108.05    NA
2016-04-01 09:16:46 108.05    NA
2016-04-01 09:16:47 108.05    NA
2016-04-04 09:15:00     NA    NA
2016-04-04 09:16:00     NA    NA
2016-04-04 09:17:00     NA    NA

Since you want NA's in NA.df, to see whats happening clearly, try this, which places by default 0 in the rows which do not match SYMBOL on Empty, instead of the default NA:

> data2<-merge(data1, NA.df, fill = 0)
> tail(data2)
                    SYMBOL Empty
2016-04-01 09:16:45 108.05     0
2016-04-01 09:16:46 108.05     0
2016-04-01 09:16:47 108.05     0
2016-04-04 09:15:00   0.00    NA
2016-04-04 09:16:00   0.00    NA
2016-04-04 09:17:00   0.00    NA

I think what you want is probably to have the NA's in the same column. To do that, you need to have the same column name (SYMBOL here), and use rbind, not merge (merge will create two columns just like cbind, as it thinks they are distinct series, even though the column name is identical).

colnames(NA.df) <- "SYMBOL"
data2<-rbind(data1, NA.df)

> tail(data2, 6)
SYMBOL
2016-04-01 09:16:45 108.05
2016-04-01 09:16:46 108.05
2016-04-01 09:16:47 108.05
2016-04-04 09:15:00     NA
2016-04-04 09:16:00     NA
2016-04-04 09:17:00     NA

Your NA.df frame as the wrong timestamps for what you want to do.

index(NA.df) <- seq(from = as.POSIXct("2016-04-01 09:15:00", tz = "Asia/Calcutta"), length.out = 3, by = "secs")
data2<-rbind(data1, NA.df)
> head(data2, 10)
                    SYMBOL
2016-04-01 09:07:31 108.40
2016-04-01 09:15:00     NA
2016-04-01 09:15:01 108.35
2016-04-01 09:15:01     NA
2016-04-01 09:15:02     NA
2016-04-01 09:15:03 108.35
2016-04-01 09:15:04 108.40
2016-04-01 09:15:05 108.20
2016-04-01 09:15:06 108.35
2016-04-01 09:15:07 108.25