№1 source table: sales (from unstructured file .csv)
id | auto | gasoline_consumption | country |
---|---|---|---|
1 | Lada Vesta, grey | 7.3 | Russia |
2 | BMW F80, red | 8.3 | Germany |
3 | Hyundai Elantra, red | 5 | South Korea |
... | ... | ... | ... |
CREATE TABLE [sales](
[id] [nvarchar](50) NOT NULL,
[auto] [nvarchar](50) NOT NULL,
[gasoline_consumption] [nvarchar](50) NULL,
[country] [nvarchar] (50)
) ON [PRIMARY];
INSERT INTO sales values(1,'Lada Vesta, grey', 7.3, 'Russia');
INSERT INTO sales values(2,'BMW F80, red', 8.3, 'Germany');
INSERT INTO sales values(3,'Hyundai Elantra, red', 5, 'South Korea');
№2 source table: Brand
id | name |
---|---|
1 | Lada |
2 | Hyundai |
3 | BMW |
... | ... |
CREATE TABLE Brand (
id INT PRIMARY KEY IDENTITY(1,1) NOT NULL,
name VARCHAR(50),
);
INSERT INTO Brand values(1, 'Lada');
INSERT INTO Brand values(2,'Hyundai');
INSERT INTO Brand values(3,'BMW');
№3 source table: Color
id | name |
---|---|
1 | grey |
2 | red |
... | ... |
CREATE TABLE Color (
id INT PRIMARY KEY IDENTITY (1,1) NOT NULL,
name VARCHAR(15)
)
INSERT INTO Color values(1, 'grey');
INSERT INTO Color values(2,'red');
№4 source table: Country
id | name |
---|---|
1 | Russia |
2 | Germany |
3 | South Korea |
... | ... |
CREATE TABLE Country (
id INT PRIMARY KEY IDENTITY (1,1) NOT NULL,
name VARCHAR(50)
)
INSERT INTO Country values(1, 'Russia');
INSERT INTO Country values(2,'Germany');
INSERT INTO Country values(2,'South Korea');
№5 Example of the final table: Cars
id | id_brand | model_name | gasoline_consumption | id_color | id_country |
---|---|---|---|---|---|
1 | 3 | F80 | 8,3 | 2 | 2 |
2 | 1 | Vesta | 7,3 | 1 | 1 |
3 | 2 | Elantra | 5 | 2 | 3 |
... | ... | ... | ... | ... | ... |
CREATE TABLE Cars (
id INT PRIMARY KEY IDENTITY(1,1) NOT NULL,
id_brand INT NOT NULL,
model_name varchar (30),
gasoline_consumption DECIMAL(3,1),
id_color INT NOT NULL,
FOREIGN KEY (id_brand) REFERENCES Brand (id),
FOREIGN KEY (id_color) REFERENCES Color (id),
FOREIGN KEY (id_country) REFERENCES Country (id)
)
I was able to transfer colors, car brand and country from the original table to other tables. But now I don't understand how to enter data into the "Cars" table if need to take into account the id relationship between the car brand, country, color and model name.
Note: These data are given as an example, in fact there are about 1000 of them there
You can join back to your reference tables to get back the id, e.g.
INSERT dbo.Cars (id_brand, model_name, gasoline_consumption, id_color, id_country)
SELECT id_brand = b.id,
model_name = SUBSTRING(s.auto, CHARINDEX(' ', s.auto), CHARINDEX(', ', s.auto) -CHARINDEX(' ', s.auto)),
gasoline_consumption = TRY_CONVERT(DECIMAL(3, 1), s.gasoline_consumption),
id_color = cr.id,
id_country = c.id
FROM dbo.Sales AS s
LEFT JOIN dbo.Country AS c
ON c.name = s.country
LEFT JOIN dbo.Brand AS b
ON b.name = LEFT(s.auto,CHARINDEX(' ', s.auto))
LEFT JOIN dbo.Color AS cr
ON cr.name = TRIM(RIGHT(s.auto, CHARINDEX(' ', REVERSE(s.auto))))
N.B. The above is inaccurate and won't work many values in `auto. If you have a colour such as "Sky Blue" it will fail, if you have a brand like "Land Rover", it will fail. The parsing logic shown is simple and for demo purposes, this should be replaced with your actual parsing logic.
You should probably encapsulate your parsing logic though to avoid repeating yourself and reduce code maintenance, something like:
CREATE VIEW dbo.Sales_Parsed
AS
SELECT model_name = SUBSTRING(s.auto, CHARINDEX(' ', s.auto), CHARINDEX(', ', s.auto) -CHARINDEX(' ', s.auto)),
gasoline_consumption = TRY_CONVERT(DECIMAL(3, 1), s.gasoline_consumption),
color = TRIM(RIGHT(s.auto, CHARINDEX(' ', REVERSE(s.auto)))),
brand = LEFT(s.auto,CHARINDEX(' ', s.auto))
FROM dbo.Sales AS s;
You can then reference this view when populating your reference tables, e.g.
INSERT dbo.Country (name)
SELECT DISTINCT sp.Country
FROM dbo.Sales_Parsed AS sp
EXCEPT
SELECT c.name
FROM dbo.Country AS c;
INSERT dbo.Color (name)
SELECT DISTINCT sp.color
FROM dbo.Sales_Parsed AS sp
EXCEPT
SELECT c.name
FROM dbo.color AS c;
INSERT dbo.Brand (name)
SELECT DISTINCT sp.brand
FROM dbo.Sales_Parsed AS sp
EXCEPT
SELECT c.name
FROM dbo.Brand AS c;
And also where you populate your cars table:
INSERT dbo.Cars (id_brand, model_name, gasoline_consumption, id_color, id_country)
SELECT id_brand = b.id,
s.model_name,
s.gasoline_consumption,
id_color = cr.id,
id_country = c.id
FROM dbo.Sales_Parsed AS s
INNER JOIN dbo.Country AS c
ON c.name = s.country
INNER JOIN dbo.Brand AS b
ON b.name = s.brand
INNER JOIN dbo.Color AS cr
ON cr.name = s.color;