How to create a model from my data on Kaggle

#tutorial #ai #python

Step1 prepare our data

1、define a function of searching images

import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    !pip install -Uqq fastai 'duckduckgo_search>=6.2'

from duckduckgo_search import DDGS
from fastcore.all import *
import time, json
def search_images(keywords, max_images=200):
    return L(DDGS().images(keywords, max_results=max_images)).itemgot('image')

2、 search for a dog photo and get URLs from a search

urls = search_images('dog photos', max_images=1)

3、download an image and take a look at it

from fastdownload import download_url
dest = 'dog.jpg'
download_url(urls[0], dest, show_progress=False)

from fastai.vision.all import *
im = Image.open(dest)
im.to_thumb(256,256)

4、do the same thing for a cat photo

download_url(search_images('cat photos', max_images=1)[0], 'cat.jpg', show_progress=False)
Image.open('cat.jpg').to_thumb(256,256)

5、grab a few examples of each of dog and cat photos, and save each group of photos to a different folder

searches = 'dog', 'cat'
path = Path('dog_or_not')

for o in searches:
    # make sub dirs in dog_or_not
    dest = (path/o)
    dest.mkdir(exist_ok=True, parents=True)

    download_images(dest, urls=search_images(f'{o} photo'))
    time.sleep(5)
    resize_images(path/o, max_size=400, dest=path/o)

6、remove the photos which might not be downloaded correctly causing our model training to fail

failed = verify_images(get_image_files(path))
failed.map(Path.unlink)

Step2 train our model

1、creat the dataloaders using a DataBlock

dls = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=[Resize(192, method='squish')]
).dataloaders(path, bs=32)
dls.show_batch(max_n=6)

2、use the pretrained model and finetune it on our dataset

learn = vision_learner(dls, resnet50, metrics=error_rate)
learn.fine_tune(3)

Step3 Use our model

1、Use the dog photo that we downloaded at the start to see what our model thinks about

is_dog,_,probs = learn.predict(PILImage.create('dog.jpg'))
print(f'This is a: {is_dog}.')
print(f"Probability it's a dog: {probs[1]:.4f}")

This is a: dog.
Probability it's a dog: 1.0000

DEV Community

How to create a model from my data on Kaggle

Top comments (0)