From da76dd91c48e6711327051adba7c61e8f09eab70 Mon Sep 17 00:00:00 2001 From: timothy Date: Thu, 11 Nov 2021 16:04:00 +1030 Subject: [PATCH] Initial garbage --- webtoon-scraper.sh | 43 +++++++++++++++++++++++++++++++++++++++ webtoon2html-segmented.sh | 43 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100755 webtoon-scraper.sh create mode 100755 webtoon2html-segmented.sh diff --git a/webtoon-scraper.sh b/webtoon-scraper.sh new file mode 100755 index 0000000..a57899f --- /dev/null +++ b/webtoon-scraper.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# Dependencies: cut wget convert (imagemagic) openssl curl +# "error.log" will list all download and convert errors + +# Set time file for sorting text +downloadtemp="/tmp/$(echo "$0" | rev | cut -d'/' -f1 | rev).$(openssl rand -hex 10).txt" +sortingtemp="/tmp/$(echo "$0" | rev | cut -d'/' -f1 | rev).$(openssl rand -hex 10).txt" +underscore="_" # Underscores are a vlaid character for variable names which messes up some of the naming. This is a dirty fix. + +# Follow given url to final destination +url=$(curl -s -L -I -o /dev/null -w '%{url_effective}' "$1") + +toonname=$(echo $url | cut -d'/' -f6) +episodeno=$(echo $url | cut -d'=' -f3) +eipsodename=$(echo $url | cut -d'/' -f7) +pictureno=0 +mkdir -p "$toonname/temp" + +wget --quiet -O "$downloadtemp" "$url" +if [ ! $? -eq 0 ]; then + echo -e "Error:\tURL: $url\t(404? This may not be an issue, check the output.)" >> "$toonname/error.log" + exit 1 +fi + +grep 'rel="nofollow" ondragstart="return false;" onselectstart="return false;" oncontextmenu="return false;"' "$downloadtemp" > "$sortingtemp" + +echo -e "\nChapter URL: $url" + +while read i; do + pictureno=$((pictureno+1)) + imageurl=$(echo "$i" | cut -d'"' -f12 | cut -d'?' -f1) # Cut out the indervidual image urls + wget --no-verbose --tries=6 --user-agent='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0' --referer="http://www.webtoons.com/" $imageurl -O "$toonname/temp/$toonname$underscore$(printf %03d $episodeno)_$(printf %03d $pictureno).jpg" || echo "URL: $imageurl Name:$toonname/temp/$toonname$underscore$(printf %03d $episodeno)_$(printf %03d $pictureno).jpg" >> "$toonname/error.log" +done < "$sortingtemp" + +# Remove the last image if it is the webtoon logo +mv "$toonname/temp/$toonname$underscore$(printf %03d $episodeno)_$(printf %03d $pictureno).jpg" "$toonname/temp/logo/$toonname$underscore$(printf %03d $episodeno)_$(printf %03d $pictureno).jpg" + +# Don't bother doint this, as this will often result in an image larger than image magic can handle +#convert -quality 95 -append "$toonname/temp/$toonname$underscore$(printf %03d $episodeno)*.jpg" "$toonname/$toonname$underscore$(printf %03d $episodeno)_$eipsodename.jpg" || echo -e "convert.im6 error: Input: $toonname/temp/$toonname$underscore$(printf %03d $episodeno)\*.jpg\tOutput: $toonname/$toonname$underscore$(printf %03d $episodeno)_$eipsodename.jpg\tURL: $url" >> "$toonname/error.log" + +# Remove temp file +rm "$sortingtemp" "$downloadtemp" diff --git a/webtoon2html-segmented.sh b/webtoon2html-segmented.sh new file mode 100755 index 0000000..46d7ca8 --- /dev/null +++ b/webtoon2html-segmented.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +underscore="_" + +#indexentry() { +# +#} + +episode_headders() { + echo ''$(echo $img | cut -d'_' -f1,2)' - WebToon2HTML

'$(echo $img | cut -d'_' -f1,2)'


' +} + +main () { + for img in *_???_???.jpg; do + if [ -f "$img" ]; then + if [ "$(echo $img | cut -d'_' -f2)" == "$current_episode" ]; then # Check to see if this isn't the first image of a new chapter. + echo "" >> "$(echo $img | cut -d'_' -f1,2).htm" + else # This is the first image for the chapter, do the initial setup. + # Put next and back arrows here + echo '' >> "$(echo $img | cut -d'_' -f1)$underscore$current_episode.htm" + episode_headders > "$(echo $img | cut -d'_' -f1,2).htm" + echo "" >> "$(echo $img | cut -d'_' -f1,2).htm" + current_episode=$(echo $img | cut -d'_' -f2) + fi + fi + done +} + + +main + + + + + +#main () { +# echo ''"${PWD##*/} - WebToon2HTML"''"

${PWD##*/}


" +# for i in *.jpg; do +# echo "" +# done +# echo "

" +#} +