diff options
| -rwxr-xr-x | comedyGenerator | 249 | 
1 files changed, 112 insertions, 137 deletions
| diff --git a/comedyGenerator b/comedyGenerator index f281ed2..5b28284 100755 --- a/comedyGenerator +++ b/comedyGenerator @@ -6,118 +6,95 @@ import os  import argparse  import time  import hashlib +import json  parser = argparse.ArgumentParser(add_help=True)  parser.add_argument('--source', -                    default=False, -                    action='store_true', -                    dest='source', -                    help='Links to the source code of the software' -                    ) +        default=False, +        action='store_true', +        dest='source', +        help='Links to the source code of the software' +        )  parser.add_argument('--debug', -                    default=False, -                    action='store_true', -                    dest='debug', -                    help='Enables debug output' -                    ) - -parser.add_argument('-js','-JS',"--Javascript",'--javascript', -                    default=True, -                    action='store_true', -                    dest='usingJavascript', -                    help='Enables the user of a webdriver to scrape funnies' -                    ) +        default=False, +        action='store_true', +        dest='debug', +        help='Enables debug output' +        )  parser.add_argument('--amount','-a', -                    default=0, -                    dest='amount', -                    action="store", -                    help="the amount of funnies you'd like to download per tag", -                    type=int -                    ) +        default=0, +        dest='amount', +        action="store", +        help="the amount of funnies you'd like to download per tag", +        type=int +        )  parser.add_argument('tags', -                    nargs='+', -                    type=str, -                    help='Provides tags to be check for funny downloading' -                    ) +        nargs='+', +        type=str, +        help='Provides tags to be check for funny downloading' +        )  args = parser.parse_args()  if args.source:      print("https://git.snootgame.xyz/PrincipalSpears/comedyGenerator") - -if not args.usingJavascript: -    from bs4 import BeautifulSoup -    print("using html mode...") -    for tags in args.tags: -        URL = requests.get('https://ifunny.co/tags/' + tags + '?filter=video') -        webpage = BeautifulSoup(URL.content, 'html.parser') -        videos = webpage.findAll('video') -        if args.amount > 0: -            videos = video[:arg.amount] -        for video in videos: -            name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" -            print("saving " + video['data-src'] + " as " + name) -            urllib.request.urlretrieve(video['data-src'], '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)  -elif args.usingJavascript: -    from selenium import webdriver -    from selenium.webdriver.common.by import By -    print("using JS mode...") -     -    try: -        if args.debug: -            print("testing if firefox works...") -        from selenium.webdriver.firefox.options import Options -        firefox_options = Options() -        firefox_options.add_argument("--headless") -        driver = webdriver.Firefox(options=firefox_options) -        if args.debug: -            print("Firefox Works!") - -    except: -            if args.debug: -                print("Firefox didn't work! Trying Chrome!") -            try: -                from selenium.webdriver.chrome.options import Options -                chrome_options = Options() -                #chrome_options.add_argument("--headless") #TODO completely breaks script and screenshot shows a white screen and nothing but. likely got discovered. -                driver = webdriver.Chrome(options=chrome_options) -                if args.debug: -                    print("Chrome Works!") -            except: -                if args.debug: -                    print("Chrome Failed! Going to attempt an install of the firefox webdriver") -                    try: -                        from selenium.webdriver.firefox.options import Options -                        firefox_options = Options() -                        firefox_options.add_argument("--headless") -                        driver = webdriver.Firefox(executable_path=GeckoDriverManager().install(), options=firefox_options) -                        if args.debug: -                             print("Install successful! using Firefox!") -                    except: -                        if args.debug: -                            print("Install Failed! Trying Chrome webdriver install!") -                            try: -                                from selenium.webdriver.chrome.options import Options -                                chrome_options = Options() -                                chrome_options.add_argument("--headless") -                                driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install(), options=chrome_options) -                            except: -                                    print("Could not find webdriver!") -                                    print("You'll have to manually install a webdriver to your path") -                                    print("If you are using GNU/Linux, it is likely that you can install from your standard repos. Debian labels their chromium driver chromium-driver. If you wish to use an ungoogled version of chromium (as to reduce possiblity of spying), you can find a link to that here: https://github.com/Eloston/ungoogled-chromium#downloads. On Debian (or debian likes such as Ubuntu or Devuan), you may then run apt install ungoogled-chromium-driver and this will no longer fail.") -                                    sys.exit() -    for tags in args.tags: +    sys.exit(0) + +headers = { +        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0", +        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", +        "Accept-Language": "en-US,en;q=0.5", +        "DNT": "1", +        "Connection": "keep-alive", +        "Upgrade-Insecure-Requests": "1", +        "Sec-Fetch-Dest": "document", +        "Sec-Fetch-Mode": "navigate", +        "Sec-Fetch-Site": "cross-site", +        "Cache-Control": "max-age=0" +        } + +videos = [] +for tags in args.tags:          if args.debug:              print("Downloading Tag: " + tags) +        headers = { +                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0", +                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", +                "Accept-Language": "en-US,en;q=0.5", +                "DNT": "1", +                "Connection": "keep-alive", +                "Upgrade-Insecure-Requests": "1", +                "Sec-Fetch-Dest": "document", +                "Sec-Fetch-Mode": "navigate", +                "Sec-Fetch-Site": "cross-site", +                "Cache-Control": "max-age=0" +            } + +        master = requests.get('https://ifunny.co/', headers=headers) +        combineHeader = (dict(master.headers)|headers) +        requestHeader = { +                "User-Agent":combineHeader['User-Agent'], +                "Content-Type":combineHeader['Content-Type'], +                "x-requested-with": "fetch", +                "x-csrf-token": combineHeader['Set-Cookie'].split(';')[0].split('=')[1], +                "set-cookies":combineHeader['Set-Cookie'], +                "access-control-allow-headers":combineHeader['access-control-allow-headers'] +                } +        requestCookies = { +                "CID" : combineHeader['Set-Cookie'].split(';')[3].split('=')[2], +                "sound" : "off", +                "viewMode" : "list", +                "x-csrf-token": combineHeader['Set-Cookie'].split(';')[0].split('=')[1] +                }          for tries in range(100):              try: -                driver.get('https://ifunny.co/tags/' + tags + '?filter=video') +                tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags, headers=requestHeader, cookies=requestCookies)                  if args.debug:                      print("Got Webpage!")              except: @@ -127,65 +104,63 @@ elif args.usingJavascript:                      continue              break -        if args.amount > 0: -            if args.debug: -                print("starting to scroll...") -            isTimesScrolled = 0 -            oughtTimeScrolled = (args.amount/10) + 1 -            while isTimesScrolled < int(oughtTimeScrolled): -                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") -                isTimesScrolled = isTimesScrolled + 1 -                time.sleep(3) -                if args.debug: -                    print("Need to scroll " + str(oughtTimeScrolled-isTimesScrolled) + " more times...") -                videos = driver.find_elements(By.TAG_NAME,"video") -            if args.debug: -                print("Original Videos List: ") -                print(videos) -        if args.amount > 0: +        JSONDump = tagPage.json() +        while len(videos) < args.amount: +            print("Currently have " + str(len(videos)) + " videos out of " + str(args.amount) + " (" + str((len(videos)/args.amount)*100) + "%)") +            for item in range(len(JSONDump['items'])): +                videos.append(JSONDump['items'][item]['url']) +            for tries in range(100): +                try: +                    tagPage = requests.get("https://ifunny.co/api/v1/feeds?filter=video&tag=" + tags + "&next=" + JSONDump['pagination']['next'], headers=requestHeader, cookies=requestCookies) +                    JSONDump = tagPage.json() +                    if args.debug: +                        print("Got New Tag Page!") +                    break +                except: +                    if tries < 100 - 1: +                        print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!") +                        time.sleep(tries*1.5) +                        continue +        if len(videos) > args.amount:              videos = videos[:args.amount]              if args.debug:                  print("Videos list truncated! Its now: " + str(len(videos)) + " units long")          for video in videos:                          print("Now running for " + str(video)) -            URL = video.get_attribute("data-src") -                 -            if isinstance(URL, str): + +            if isinstance(video, str):                  if args.debug: -                    print("URL read as: " + URL) -                name = tags + "-" + hashlib.md5(URL.encode('utf-8')).hexdigest() + ".mp4" +                    print("URL read as: " + video) +                name = tags + "-" + hashlib.md5(video.encode('utf-8')).hexdigest() + ".mp4" +                path = '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name                  if args.debug:                      print("name read as: " + name) -                    print("saving " + video.get_attribute("data-src") + " as " + name) -                for tries in range(100): -                    try: -                        urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)  -                    except: +                    print("saving " + video + " as " + name) +                if os.path.exists(path): +                    print(name + " already exists!") +                else: +                    for tries in range(100): +                        try: +                            urllib.request.urlretrieve(video, path)  +                        except:                              if tries < 100 - 1:                                  print("Rate Limited! Sleeping for " + str(tries*1.5) + " seconds!")                                  time.sleep(tries*1.5) -                                continue -                    break -                    #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)  +                            continue +                        break +                #urllib.request.urlretrieve(video.get_attribute("data-src"), '/home/' + os.environ['USER'] + '/Videos/unsorted/' + name)               elif args.debug: -                print("URL is NOT a string, it is a " + str(type(URL))) - - +                print("URL is NOT a string, it is a " + str(type(video))) -    driver.quit() -else: -    print("Neither Javascript or HTML was given!") -    sys.exit(1) -     -#    This program is free software: you can redistribute it and/or modify -#    it under the terms of the GNU Affero General Public License version 3 as published by -#    the Free Software Foundation. +#This program is free software: you can redistribute it and/or modify +#it under the terms of the GNU Affero General Public License version 3 as published by +#the Free Software Foundation.  # -#    This program is distributed in the hope that it will be useful, -#    but WITHOUT ANY WARRANTY; without even the implied warranty of -#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -#    GNU Affero General Public License for more details. +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#GNU Affero General Public License for more details.  # -#    You should have received a copy of the GNU Affero General Public License -#    along with this program.  If not, see <https://www.gnu.org/licenses/>. +#You should have received a copy of the GNU Affero General Public License +#along with this program.  If not, see <https://www.gnu.org/licenses/>. | 
