I'm trying to figure out if it's possible to create categories using custom filters.
I am building an e-commerce app and I have set up my category model using mptt. I am importing a csv that creates my top level categories which works fine. The problem is I will need to have sub-categories that are more specific e.g Men's Clothing(Top Level) > Jeans.
The csv has several fields that contains info relating to each product e.g description: "stone wash bootcut jeans". I would ideally like to check these fields for keywords and add each product to the correct categories. Is it possible to set up categories this way or is there an alternative solution?
I am a django newbie so any help is appreciated.
models.py
from django.db import models
from mptt.models import MPTTModel, TreeForeignKey
class Category(MPTTModel):
name = models.CharField(max_length=50, unique=True)
parent = TreeForeignKey('self', null=True, blank=True, related_name='children', db_index=True, on_delete=models.CASCADE)
slug = models.SlugField()
class MPTTMeta:
order_insertion_by = ['name']
class Meta:
unique_together = (('parent', 'slug',))
verbose_name_plural = 'categories'
def get_slug_list(self):
try:
ancestors = self.get_ancestors(include_self=True)
except:
ancestors = []
else:
ancestors = [ i.slug for i in ancestors]
slugs = []
for i in range(len(ancestors)):
slugs.append('/'.join(ancestors[:i+1]))
return slugs
def __str__(self):
return self.name
class Brands(models.Model):
brand_name = models.CharField(max_length=500, default='')
def __str__(self):
return self.brand_name
class Product(models.Model):
aw_deep_link = models.CharField(max_length=500, default='')
description = models.CharField(max_length=500, default='')
product_name = models.CharField(max_length=500, default='')
aw_image_url = models.CharField(max_length=500, default='')
search_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
merchant_name = models.CharField(max_length=500, default='')
display_price = models.CharField(max_length=500, default='')
brand_name = TreeForeignKey('Brands', on_delete=models.CASCADE)
colour = models.CharField(max_length=500, default='')
rrp_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
category = TreeForeignKey('Category',null=True,blank=True, on_delete=models.CASCADE)
slug = models.SlugField(default='')
def __str__(self):
return self.product_name
importCSV.py
import re
from products.models import Category, Brands
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Load some sample data into the db"
def add_arguments(self, parser):
parser.add_argument('--file', dest='file', help='File to load')
def handle(self, **options):
from products.models import Product
if options['file']:
print("Importing " + options['file'])
with open(options['file']) as f:
linecount = 0
next(f)
for line in f:
linecount += 1
fields = line.split(',')
category = Category.objects.get_or_create(name=fields[10])
brand_name = Brands.objects.get_or_create(brand_name=fields[7])
data = {
'aw_deep_link': fields[0],
'description': fields[1],
'product_name': fields[2],
'aw_image_url': fields[3],
'search_price': fields[4],
'merchant_name': fields[5],
'display_price': fields[6],
'brand_name': brand_name[0],
'colour' : fields[8],
'rrp_price' : fields[9],
'category' : category[0],
}
product = Product(**data)
product.save()
print("Added {0} products".format(linecount))
So you have
From this setup I would at first try to generalize the "search term" for each subcategroy, maybe by a regex, depending on the complexity of conditions that you need to identify a subcategory. Most probably a list of synonyms is already sufficient. Add such a field to your Category
model (here a regex solution):
class Category(models.Model):
regex = models.CharField(max_length=100, blank=True) # only needed for subcategories (top level from csv)
...
For your example where trainers
and runners
would be equivalent (to my English knowledge these are plural words here, so not equivalent to trainer
or runner
appearing anywhere), this would by (as a regex) r'trainers|runners'
This is the part you need define manually - I don't envy your for the tedious work involved ;)
Afterwards, your import loop would need some changes around here:
def handle(self, **options):
from products.models import Product, Category
all_categories = list(Category.objects.all())
# converted to list to evaluate Queryset and don't query again in the loop below
and here
data = ...
for textfield in ('description', 'product_name'):
# I suppose these are the two relevant fields to scan?
subcat = None
for cat in all_categories:
if re.search(cat.regex, data[textfield]) is not None:
if cat.is_leaf_node():
# only consider nodes that have no children
subcat = cat
break
if subcat is not None:
break
# subcat is now the first matching subcategory
if subcat is not None:
data['category'] = subcat
product = Product(**data)
Complete
import re
from products.models import Category, Brands
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Load some sample data into the db"
def add_arguments(self, parser):
parser.add_argument('--file', dest='file', help='File to load')
def handle(self, **options):
from products.models import Product, Category
all_categories = list(Category.objects.all())
if options['file']:
print("Importing " + options['file'])
with open(options['file']) as f:
linecount = 0
next(f)
for line in f:
linecount += 1
fields = line.split(',')
category = Category.objects.get_or_create(name=fields[10])
brand_name = Brands.objects.get_or_create(brand_name=fields[7])
data = {
'aw_deep_link': fields[0],
'description': fields[1],
'product_name': fields[2],
'aw_image_url': fields[3],
'search_price': fields[4],
'merchant_name': fields[5],
'display_price': fields[6],
'brand_name': brand_name[0],
'colour' : fields[8],
'rrp_price' : fields[9],
'category' : category[0],
}
for textfield in ('description', 'product_name'):
# I suppose these are the two relevant fields to scan?
subcat = None
for cat in all_categories:
if re.search(cat.regex, data[textfield]) is not None:
if cat.is_leaf_node():
# only consider nodes that have no children
subcat = cat
break
if subcat is not None:
break
# subcat is now the first matching subcategory
if subcat is not None:
data['category'] = subcat
product = Product(**data)
product.save()
print("Added {0} products".format(linecount))