diff options
| -rw-r--r-- | indeedwatcher.nimble | 2 | ||||
| -rw-r--r-- | src/indeedwatcher.nim | 69 | 
2 files changed, 34 insertions, 37 deletions
| diff --git a/indeedwatcher.nimble b/indeedwatcher.nimble index c647a88..cf69199 100644 --- a/indeedwatcher.nimble +++ b/indeedwatcher.nimble @@ -1,6 +1,6 @@  # Package -version       = "1.0.4" +version       = "1.0.5"  author        = "msglm"  description   = "Watches indeed for job updates."  license       = "AGPL-3.0-only" diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index bc52138..8c4a71e 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -6,6 +6,7 @@ import os  import strutils  import parsetoml  import sequtils +import uri  if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):      createDir(getConfigDir() & "/indeedwatcher/") @@ -53,7 +54,8 @@ echo "connecting"  #TODO make the port configurable, some users may have something running here  let driver = newWebDriver("http://localhost:9515")  var session: Session -var counter = 0 +var feedcounter = 0 +var urlcounter = 0  #Behavior when CTRL+C  proc terminate() {.noconv.} =  @@ -70,7 +72,10 @@ setControlCHook(terminate)  for feed in feeds:      session = driver.createSession() -    sleep 3000 +    if feedcounter > 3: +        echo "resting for 20 seconds ..." +        sleep 20000 +        feedcounter = 0      #Getting the listing URLs from the feeds      var rssFeedReply: RSS      for attempt in countup(0,3): @@ -87,20 +92,24 @@ for feed in feeds:      for entry in rssFeedReply.items:          #Sleep so indeed.com doesn't freak out -        if counter > 7: +        if urlcounter > 7:              echo "resting for 10 seconds ..."              sleep 10000 -            counter = 0 +            urlcounter = 0 -        #Don't even bother visiting it if its in the cache +        #Don't even bother visiting it if its in the cache or hits a trigger word          var URL = entry.link -        let URLID = entry.link.split('&')[3] +        let queries = URL.parseUri.query.decodeQuery().toSeq() +        let jobName = queries[0].value +        let employer = queries[1].value +        let location = queries[2].value +        let URLID = queries[3].value          #This isn't cache.readFile().contains(URLID)          #because nim has no way to both open a file in append mode          #and also open it as reading. Therefore, this blunder, which          #creates a new file in memory, is used instead. -        if not readFile(cachefileloc).contains(URLID): +        if not readFile(cachefileloc).contains(URLID) or not any(titleblacklist, proc (input: string): bool = jobName.contains(input)):              for attempt in countup(0,3):                  try:                      echo "Telling chromium to navigate to " & URL @@ -112,21 +121,12 @@ for feed in feeds:                      else:                          raise                  break -            counter = counter + 1 +            urlcounter = urlcounter + 1              #HTML Parser              echo "Beginning to parse..." -            let jobTitle = session.findElement(".jobsearch-JobInfoHeader-title").get().getText()              let fullDesc = session.findElement("#jobDescriptionText").get().getText() -            var employer: string -            try: -                #This takes the location from the URL, removes all the junk around it, and replaced the URL pluses with actual spaces -                #perhaps, a URL parsing library could have been used for this. -                employer = entry.link.split('&')[1][2..^1].replace("+"," ") -            except UnpackDefect: -                employer = "None Listed" -                          var salaryInfoAndJobType: string              try:                  salaryInfoAndJobType = session.findelement("#salaryInfoAndJobType").get().gettext() @@ -134,26 +134,23 @@ for feed in feeds:                  salaryInfoAndJobType = "None Listed"              echo "Finishing the parse..." -            #Filtering -            if not any(titleblacklist, proc (input: string): bool = jobTitle.contains(input)): -                 echo "Beginning to write to file..." -                 #Output -                 var output = """ -                 Title: $1 -                 Company: $2 -                 Salary Info and Job Type: $3 -                 URL : $4 -                 Description: -                 $5 -                 """ % [jobTitle, employer, salaryInfoAndJobType, URL, fullDesc] -                 writeFile(outdir & jobTitle.replace("/") & ".txt", output) -                 echo "Wrote job to file!" -                 cache.writeLine(URL) -                 echo "Wrote listing to cache!" -            else: -                echo "Trigger was hit, discarding " & URL +            echo "Beginning to write to file..." +            #Output +            var output = """ +            Title: $1 +            Employer: $2 +            Location: $3 +            Salary Info and Job Type: $4 +            URL : $5 +            Description: +            $6 +            """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc] +            writeFile(outdir & jobName.replace("/") & ".txt", output) +            echo "Wrote job to file!" +            cache.writeLine(URL) +            echo "Wrote listing to cache!"          else: -            echo URL & " was in cache, discarding" +            echo URL & " was in cache or hit a trigger word, discarding"      session.close() | 
