<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: MAK KA WAI</title>
    <description>The latest articles on DEV Community by MAK KA WAI (@makkawai).</description>
    <link>https://dev.to/makkawai</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1872671%2F5bc5cc5c-b258-42fd-a576-6764ea9bc4e2.png</url>
      <title>DEV Community: MAK KA WAI</title>
      <link>https://dev.to/makkawai</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/makkawai"/>
    <language>en</language>
    <item>
      <title>Date: 29/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Thu, 29 Aug 2024 14:10:13 +0000</pubDate>
      <link>https://dev.to/makkawai/date-29082024-2m24</link>
      <guid>https://dev.to/makkawai/date-29082024-2m24</guid>
      <description>&lt;p&gt;I added the chat bot to my Application&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Date: 28/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Thu, 29 Aug 2024 14:08:42 +0000</pubDate>
      <link>https://dev.to/makkawai/date-28082024-ee5</link>
      <guid>https://dev.to/makkawai/date-28082024-ee5</guid>
      <description>&lt;p&gt;I applied for the google search API. Maybe useful later.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fux4c8ck961l2j44bpbid.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fux4c8ck961l2j44bpbid.png" alt="Image description" width="800" height="293"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I made the button for scrolling. After a while, I found that it  is useless.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftbu376aoz2o73xg2q3ve.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ftbu376aoz2o73xg2q3ve.png" alt="Image description" width="322" height="930"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 27/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Tue, 27 Aug 2024 15:11:37 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-27082024-325m</link>
      <guid>https://dev.to/makkawai/daily-log-27082024-325m</guid>
      <description>&lt;p&gt;I integrated the function to my prototype. &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F4d1jdn0m5pvn5u6s6egd.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F4d1jdn0m5pvn5u6s6egd.png" alt="Image description" width="800" height="815"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 26/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Mon, 26 Aug 2024 12:53:50 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-26082024-4k0e</link>
      <guid>https://dev.to/makkawai/daily-log-26082024-4k0e</guid>
      <description>&lt;p&gt;Today, I made a text area for &lt;strong&gt;negative prompt&lt;/strong&gt; to filter unwanted job(s). &lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 25/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Mon, 26 Aug 2024 12:53:17 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-25082024-3ak</link>
      <guid>https://dev.to/makkawai/daily-log-25082024-3ak</guid>
      <description>&lt;p&gt;I made the a &lt;strong&gt;dropdown list&lt;/strong&gt; to &lt;strong&gt;filter&lt;/strong&gt; unwanted job’s regions.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fhnsmi5zujzm74zyod9sr.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fhnsmi5zujzm74zyod9sr.png" alt="Image description" width="800" height="544"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 24/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Sat, 24 Aug 2024 14:44:18 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-24082024-2kic</link>
      <guid>https://dev.to/makkawai/daily-log-24082024-2kic</guid>
      <description>&lt;p&gt;For my application, now it can search for the appropriate jobs by uploading resume.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F80afxn8ffovxzp12izsr.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F80afxn8ffovxzp12izsr.png" alt="Image description" width="800" height="641"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I ran web crawler. Now, 5108 jobs in ChromaDB.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2wol87col6xvrynovdng.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F2wol87col6xvrynovdng.png" alt="Image description" width="800" height="101"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;I found the ‘fetch all jobs’ function will cause the problem of lack of ram. &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fhqe91ngmsarcn98kp13q.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fhqe91ngmsarcn98kp13q.png" alt="Image description" width="800" height="473"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 23/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Fri, 23 Aug 2024 15:20:55 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-23082024-15ad</link>
      <guid>https://dev.to/makkawai/daily-log-23082024-15ad</guid>
      <description>&lt;h2&gt;
  
  
  Overall
&lt;/h2&gt;

&lt;p&gt;I was developing my AI application.&lt;/p&gt;

&lt;h2&gt;
  
  
  Problem facing
&lt;/h2&gt;

&lt;p&gt;I found that Jinja is conflict with &lt;code&gt;jsonify()&lt;/code&gt;, result in error of upload file and chat with GPT. Mentor helped me to solve it.&lt;/p&gt;

&lt;h2&gt;
  
  
  Learn
&lt;/h2&gt;

&lt;p&gt;Since I want to extract all company ID in &lt;a href="https://www.ctgoodjobs.hk/" rel="noopener noreferrer"&gt;https://www.ctgoodjobs.hk/&lt;/a&gt; . I tried to web crawling. Seccess.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;import requests
from bs4 import BeautifulSoup
import json
import parsel  # 第三方的模块

def main():

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
    }
    # Fetch the job listings from the API
    url = "https://www.ctgoodjobs.hk/top-companies"
    html_data = requests.get(url=url, headers=headers).text
    selector = parsel.Selector(html_data)

    # .get(): return string; no: return lsit
    # .get: return 1st; no: all
    lists = selector.css('.sub-sec li')
    extra_data = selector.css('div.sub-sec ul.extra::text').get() # no text: string with &amp;lt;element&amp;gt; tag 

    company_ids = []

    # tokenization
    company_ids = extra_data.strip().split(',')

    for list in lists:
        company_id = list.css('a::attr(data-company-id)').get()
        company_ids.append(company_id)

    print(company_ids)
    print(len(company_ids))


if __name__ == "__main__":
    main()

&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



</description>
    </item>
    <item>
      <title>Daily Log - 22/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Thu, 22 Aug 2024 14:30:07 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-22082024-5gkf</link>
      <guid>https://dev.to/makkawai/daily-log-22082024-5gkf</guid>
      <description>&lt;p&gt;I continued to develop my Application. I was making the html elements, some minor functions and python flask.&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 21/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Wed, 21 Aug 2024 15:08:56 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-21082024-1f3f</link>
      <guid>https://dev.to/makkawai/daily-log-21082024-1f3f</guid>
      <description>&lt;p&gt;I started to develop the first prototype of my application&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 20/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Wed, 21 Aug 2024 04:34:28 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-20082024-1h3d</link>
      <guid>https://dev.to/makkawai/daily-log-20082024-1h3d</guid>
      <description>&lt;p&gt;I tried to design my application and build my html template.&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 19/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Mon, 19 Aug 2024 13:19:57 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-19082024-55pc</link>
      <guid>https://dev.to/makkawai/daily-log-19082024-55pc</guid>
      <description>&lt;p&gt;I found a big problem. When I was reading the data extracted from ctgoodjobs.hk in chroma DB, there is no a record with real salary value, all are N/A. There is job with real salary value.&lt;/p&gt;

&lt;p&gt;e.g.&lt;br&gt;
Hash: deef24dd43347dfd6077827d6bdfa86d&lt;br&gt;
&lt;a href="https://jobs.ctgoodjobs.hk/job/08976889/licensing-officer-shatin-22k-transport-department" rel="noopener noreferrer"&gt;https://jobs.ctgoodjobs.hk/job/08976889/licensing-officer-shatin-22k-transport-department&lt;/a&gt; &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fn4f7daw1axqa4bx3k2xp.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fn4f7daw1axqa4bx3k2xp.png" alt="Image description" width="800" height="778"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Salary has real number&lt;/p&gt;

&lt;p&gt;From &lt;code&gt;streamlit run app.py&lt;/code&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fw13ivnnstqzpuh4mzip4.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fw13ivnnstqzpuh4mzip4.png" alt="Image description" width="800" height="665"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Salary hasn’t real number&lt;/p&gt;

&lt;p&gt;I try to run ‘crawler.py’ and ‘app.py’ again. &lt;br&gt;
Another problem was occurred. In the previous day, I thought running ‘crawler.py’ and ‘app.py’ successfully, I was worried it may not work. &lt;/p&gt;

&lt;p&gt;I didn’t understand the query. When I searched with a job title, no correct output.&lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fiyld6mhzqdmu4d0qbxuv.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fiyld6mhzqdmu4d0qbxuv.png" alt="Image description" width="800" height="813"&gt;&lt;/a&gt;&lt;/p&gt;



&lt;p&gt;In order to check whether successful to add record to chroma DB. I ran program&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;# job_counter.py

import chromadb

def connect_to_chromadb():
    client = chromadb.PersistentClient(path="./job_posts")  # 確保這個路徑與主應用程序中的路徑相同
    return client

def count_jobs(client):
    collection_name = "jobs"
    collection = client.get_collection(name=collection_name)
    return collection.count()

if __name__ == "__main__":
    client = connect_to_chromadb()
    total_jobs = count_jobs(client)
    print(f"Total number of jobs in the database: {total_jobs}")

&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;before and after running crawler. Result is success. &lt;/p&gt;




&lt;p&gt;After I modify the code of ‘crawler.py’, the salary was shown now. &lt;/p&gt;

&lt;p&gt;&lt;a href="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fkvvn0ripd1v3j818shrx.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media.dev.to/cdn-cgi/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fkvvn0ripd1v3j818shrx.png" alt="Image description" width="800" height="875"&gt;&lt;/a&gt;&lt;/p&gt;

</description>
    </item>
    <item>
      <title>Daily Log - 18/08/2024</title>
      <dc:creator>MAK KA WAI</dc:creator>
      <pubDate>Sun, 18 Aug 2024 11:53:43 +0000</pubDate>
      <link>https://dev.to/makkawai/daily-log-18082024-21m7</link>
      <guid>https://dev.to/makkawai/daily-log-18082024-21m7</guid>
      <description>&lt;p&gt;Today is Sunday. Rest!&lt;/p&gt;

</description>
    </item>
  </channel>
</rss>
