# this must be added to setting chinese
Sys.setlocale(category = 'LC_ALL', 'Chinese')
#Sys.setlocale(, 'English')
#options("encoding" = "native.enc")
#options("encoding" = "UTF-8")
setwd("C:/Users/User/Pictures/sexpage")
#library(audio)
library(rvest)
library(crayon)
ligSilver <- make_style("#889988")
pageHeader="http://freematuresgallery.com/?page="
#pageTail="/"
pageTail=""
className = ".th" # pornpic
titleName = "freematuresgallery"
totalPages= 27
theFilename = paste0("freematuresgallery.photo.", titleName, ".html")
allLinks = character()
wholePage = character()
addr = 0:totalPages
lentocpage = length(addr)
cat("\nlentocpage: ",lentocpage,"\n")
ProcessStartTime = Sys.time()
cat(format(Sys.time(), "%H:%M:%OS"),"\n")
dhms <- function(t){
paste(t %/% (60*60*24), "day"
,paste(formatC(t %/% (60*60) %% 24, width = 2, format = "d", flag = "0")
,formatC(t %/% 60 %% 60, width = 2, format = "d", flag = "0")
,formatC(t %% 60, width = 2, format = "d", flag = "0")
,sep = ":"
)
)
}
for(i in 1:length(addr)-1){
cat(i, "/", length(addr), " ")
#guess_encoding(pagesource)
#pagesource <- read_html(paste0(pageHeader,addr[i],pageTail), encoding = "UTF-8")
#url = paste0(pageHeader,addr[i],pageTail)
url = paste0(pageHeader, addr[i],pageTail)
#url = paste0(pageHeader,addr[i],pageTail)
cat(url, "\n")
pagesource <- read_html(url)
itemList <- html_nodes(pagesource, className)
itemList = as.character(itemList)
itemListIdx = grep("http://archivegalleries", itemList)
itemList = itemList[itemListIdx]
itemList = gsub('/newpics', 'http://freematuresgallery.com/newpics', itemList)
itemList = gsub('img src', 'img class="lazy" data-src', itemList)
itemList = gsub('', '
', itemList)
allLinks = c(allLinks, itemList)
if(i == 10){
ProcessEndTime = Sys.time()
LoopTime = as.numeric(ProcessEndTime - ProcessStartTime, units="secs")
ecTime = length(addr)*LoopTime/10
cat(red(
"\n\n Expect to complete at: ", as.character(ProcessStartTime + ecTime),"\n",
"per cycle time: ", dhms(LoopTime/10),"\n",
"Expected total time: ", dhms(ecTime),"\n\n"
))
}
}
cat(red("\ncollect links complete!\n"))
allLinks = sort(allLinks)
sink(paste0(titleName, "Links.html"))
cat(allLinks, sep="\n")
sink()
cat(yellow("\nallLinks file saved!\n"))
cat(yellow("\n\ncollecting images...\n"))
ProcessStartTime1 = Sys.time()
## collect all imgs
#for(i in 1:length(allLinks)){
# cat(i, "/", length(allLinks), " ")
#
# url = allLinks[i]
#
# cat(url, "\n")
# pagesource <- read_html(url)
#
# itemList <- html_nodes(pagesource, className)
# itemList = as.character(itemList)
# #writeClipboard(itemList)
#
# itemListIdx = grep('href="//sexhd.pics', itemList)
# itemList = itemList[itemListIdx]
#
# itemList = gsub('
\n')
wholePage = c(wholePage, imgUrl)
}
}
#writeClipboard(wholePage)
templateHead = readLines("templateHead.txt")
templateTail = readLines("templateTail.txt")
templateHead = gsub("mom50", titleName, templateHead)
sink(theFilename)
cat(templateHead, sep="\n")
cat(wholePage, sep="\n")
cat(templateTail, sep="\n")
sink()
ProcessEndTime = Sys.time()
cat(format(Sys.time(), "%H:%M:%OS"),"\n")
LoopTime = trunc(as.numeric(ProcessEndTime - ProcessStartTime, units="secs"))
cat("Task completed! loop time: ", dhms(LoopTime),"\n\n\n")
cat(theFilename, "created\n")