library(data.table) library(ggplot2) library(scales) library(doParallel) dt <- data.table(read.table("cache.txt",header=TRUE,sep="\t",nrows=-1)) # fraction of zero-response-size requests dt[response_size==0,length(hashed_host_path)]/dt[,length(hashed_host_path)] # queries with non-null query dt[uri_query!=""] # http methods dt[,length(hashed_host_path),by=(http_method)] # content types dt[,round(length(hashed_host_path)/nrow(dt),3),by=(content_type)] ## unique objects for *all* requests tmp <- dt[,length(response_size),by=hashed_host_path] # total requests nrow(dt) # unique object count nrow(tmp) # objects with only 1 request (one-hit-wonders) nrow(tmp[V1==1]) ## unique objects for *cp4006-front* requests dt[,cache1:=do.call("rbind",strsplit(as.character(x_cache)," ",fixed=TRUE))[,7]] tmp.cp4006 <- dt[cache1=="cp4006",length(response_size),by=hashed_host_path] # total requests tmp.cp4006[,sum(as.numeric(V1))] # unique object count nrow(tmp.cp4006) # objects with only 1 request (one-hit-wonders) nrow(tmp.cp4006[V1==1]) ## time to first first byte dt[,min(time_firstbyte)*1000] #ms dt[,max(time_firstbyte)] #s # quick look at object sizes dt[,mean(response_size)/1024] #KB dt[,median(response_size)/1024] #KB dt[,min(response_size)/1024] #KB dt[,max(response_size)/1024/1024] #MB # cp4006 request volume dt[,sum(as.numeric(response_size))/2^30,by=cache1]