john relee john relee - 22 days ago 5
R Question

generate histogram in R for employee

Below is my dataset containing records of an employee attendance

date intime outtime
2 02/11/2015 10:21:27 17:58:12
3 03/11/2015 10:13:09 18:52:44
4 04/11/2015 10:11:52 18:40:36
5 05/11/2015 10:31:42 18:16:57
6 06/11/2015 10:13:13 18:36:15
10 10/11/2015 10:03:20 18:07:52
11 11/11/2015 09:40:20 18:42:20
12 12/11/2015 10:38:56 18:37:20
13 13/11/2015 10:45:26 18:09:54
16 16/11/2015 10:13:13 18:36:15
17 17/11/2015 10:11:43 18:36:15
18 18/11/2015 10:13:13 18:36:15
19 19/11/2015 10:13:13 18:36:15
20 20/11/2015 12:14:25 20:25:08
23 23/11/2015 10:08:08 17:57:35
24 24/11/2015 14:30:32 18:36:15


The total time served by employee in hours is :

total_time <- with(newdata, sum(pmin(newdata$outtime, "18:00:00") -
pmax(newdata$intime, "08:00:00") ))
total_time <- 24*floor(as.numeric(total_time))
"Total time served by employee is : 96 hours"


I want to generate histogram for each employee showing hours served on monthly basis having a total of 5 bins.

Answer

I changed the data such that we had info for more months: ( for a better histogram)

library(data.table)
df = fread("    date   intime  outtime
           02/11/2015 10:21:27 17:58:12
           03/11/2015 10:13:09 18:52:44
           04/11/2015 10:11:52 18:40:36
           05/11/2015 10:31:42 18:16:57
           06/11/2015 10:13:13 18:36:15
           10/11/2015 10:03:20 18:07:52
           11/11/2015 09:40:20 18:42:20
           12/11/2015 10:38:56 18:37:20
           13/11/2015 10:45:26 18:09:54
           16/11/2015 10:13:13 18:36:15
           17/11/2015 10:11:43 18:36:15
           18/11/2015 10:13:13 18:36:15
           19/11/2015 10:13:13 18:36:15
           20/11/2015 12:14:25 20:25:08
           23/11/2015 10:08:08 17:57:35
           24/11/2015 14:30:32 18:36:15")

 df$intime <- as.POSIXct(df$intime, format = "%H:%M:%S")
 df$outtime <- as.POSIXct(df$outtime, format = "%H:%M:%S")

library(lubridate) #to extract the day
df$day <- dmy(df$date)
df$day <- day(df$day)

df$total_time <- difftime(pmin(df$outtime, as.POSIXct("18:00:00", format = "%H:%M:%S")), 
                       pmax(df$intime, as.POSIXct("08:00:00", format = "%H:%M:%S")), units = "hours")
df$total_time <- as.numeric(df$total_time)

library(ggplot2)
ggplot(df, aes(x = day, y= total_time))+geom_histogram(stat = "identity", bins = 5)

op


with just 5 bins (only 4 bins appear in the plot because no data for 24-30 day's intervals) :

df$breaks <- cut(df$day, breaks = c(0,5,10,15,30,max(df$day)))
df1=ddply(df, "breaks", summarise, "total_hr"=sum(total_time))
ggplot(df1, aes(x = breaks, y= total_hr))+
  geom_histogram(stat = "identity", bins = 5, binwidth = 0)

op