diff options
| author | msglm <msglm@techchud.xyz> | 2025-08-17 19:54:24 -0500 | 
|---|---|---|
| committer | msglm <msglm@techchud.xyz> | 2025-08-17 19:54:24 -0500 | 
| commit | 8d9354fe2ca505fe89d3971205a8dfa4ca02d603 (patch) | |
| tree | c4b56b225b8ed571fe12da483371b87fe6141877 /src | |
| parent | e6b78e264767eb2fabe2f6d9455137b879b1ecf3 (diff) | |
| download | indeed-watcher-8d9354fe2ca505fe89d3971205a8dfa4ca02d603.tar.gz indeed-watcher-8d9354fe2ca505fe89d3971205a8dfa4ca02d603.tar.bz2 indeed-watcher-8d9354fe2ca505fe89d3971205a8dfa4ca02d603.zip | |
added more ways to blacklist undesired content
Diffstat (limited to 'src')
| -rw-r--r-- | src/indeedwatcher.nim | 45 | 
1 files changed, 30 insertions, 15 deletions
| diff --git a/src/indeedwatcher.nim b/src/indeedwatcher.nim index 0fc8af4..a8d6bfc 100644 --- a/src/indeedwatcher.nim +++ b/src/indeedwatcher.nim @@ -9,6 +9,14 @@ import sequtils  import uri  import json +type +    indeedJobDesc = object +        URL: string +        jobName: string +        employer: string +        location: string +        URLID: string +  if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):      createDir(getConfigDir() & "/indeedwatcher/")      let defaultConfig = """ @@ -26,6 +34,8 @@ if not fileExists(getConfigDir() & "/indeedwatcher/config.toml"):      #Phrases that, if they appear, will cause the job to be instantly thrown out      [blacklist]      title= [\"Senior\", \"Sr.\"] +    employer= [\"NSA\"] +    location= [\"Ohio\"]      """      writeFile(getConfigDir() & "/indeedwatcher/config.toml", defaultConfig) @@ -48,6 +58,8 @@ let cache = open(cachefileloc, fmAppend)  ##Filtering section of config  let titleblacklist = config["blacklist"]["title"].getElems().mapIt(it.getStr()) +let employerblacklist = config["blacklist"]["employer"].getElems().mapIt(it.getStr()) +let locationblacklist = config["blacklist"]["location"].getElems().mapIt(it.getStr())  ##Does the user desire headlessness?  var args: JsonNode @@ -80,6 +92,14 @@ proc terminate() {.noconv.} =      quit()  setControlCHook(terminate) +proc postValid(posting: indeedJobDesc) : bool = +    if any(titleblacklist, proc (input: string): bool = posting.jobName.contains(input)) and +       any(employerblacklist, proc (input: string): bool = posting.employer.contains(input)) and +       any(locationblacklist, proc (input: string): bool = posting.location.contains(input)): +           return true +    else: +        return false +  for feed in feeds:      #let args = %*{"desiredCapabilities":{"browserName":"chromium"}}      session = driver.createSession(args) @@ -110,22 +130,19 @@ for feed in feeds:              urlcounter = 0          #Don't even bother visiting it if its in the cache or hits a trigger word -        var URL = entry.link -        let queries = URL.parseUri.query.decodeQuery().toSeq() -        let jobName = queries[0].value -        let employer = queries[1].value -        let location = queries[2].value -        let URLID = queries[3].value +        let queries = entry.link.parseUri.query.decodeQuery().toSeq() +        var posting: indeedJobDesc +        posting = indeedJobDesc(URL: entry.link, jobName: queries[0].value, employer: queries[1].value, location: queries[2].value, URLID: queries[3].value)          #This isn't cache.readFile().contains(URLID)          #because nim has no way to both open a file in append mode          #and also open it as reading. Therefore, this blunder, which          #creates a new file in memory, is used instead. -        if not readFile(cachefileloc).contains(URLID) and not any(titleblacklist, proc (input: string): bool = jobName.contains(input)): +        if not readFile(cachefileloc).contains(posting.URLID) and not postValid(posting):              for attempt in countup(0,3):                  try: -                    echo "Telling chromium to navigate to " & URL -                    session.navigate(URL) +                    echo "Telling chromium to navigate to " & posting.URL +                    session.navigate(posting.URL)                  except:                      if attempt < 3:                          echo "Recieved an error: trying again..." @@ -156,17 +173,15 @@ for feed in feeds:              URL : $5              Description:              $6 -            """ % [jobName, employer, location, salaryInfoAndJobType, URL, fullDesc] -            writeFile(outdir & jobName.replace("/") & ".txt", output) +            """ % [posting.jobName, posting.employer, posting.location, salaryInfoAndJobType, posting.URL, fullDesc] +            writeFile(outdir & posting.jobName.replace("/") & ".txt", output)              echo "Wrote job to file!" -            cache.writeLine(URL) +            cache.writeLine(posting.URL)              echo "Wrote listing to cache!"          else: -            echo URL & " was in cache or hit a trigger word, discarding" +            echo posting.URL & " was in cache or hit a trigger word, discarding"      session.close()  cache.close() -#session.close()  terminate(chromedriver) - | 
