I have 2 tables
CREATE TABLE `CORPUS_FILE_SENTENCE` (
`corpus_file_sentence_id` int(11) NOT NULL AUTO_INCREMENT,
`corpus_file_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
`date_sentence_saved` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`sentence_number` int(11) NOT NULL,
`corpus_file_id` int(11) NOT NULL,
`comment` varchar(200) DEFAULT NULL,
PRIMARY KEY (`corpus_file_sentence_id`),
KEY `fk_corpus_file_sentence-corpus_file_id` (`corpus_file_id`),
CONSTRAINT `fk_corpus_file_sentence-corpus_file_id` FOREIGN KEY (`corpus_file_id`) REFERENCES `CORPUS_FILE` (`corpus_file_id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1;
and
CREATE TABLE `CORPUS_FILE_PREPROCESSED_SENTENCE` (
`corpus_file_preprocessed_sentence_id` int(11) NOT NULL AUTO_INCREMENT,
`coreference_resolved_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`stopword_removed_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`lemmatized_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`lowercased_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`preprocessed_sentence_text` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`date_preprocessed_sentence_saved` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`corpus_file_sentence_id` int(11) NOT NULL,
`comment` varchar(200) DEFAULT NULL,
PRIMARY KEY (`corpus_file_preprocessed_sentence_id`),
KEY `fk_corpus_file_preprocessed_sentence-corpus_file_sentence_id` (`corpus_file_sentence_id`),
CONSTRAINT `fk_corpus_file_preprocessed_sentence-corpus_file_sentence_id` FOREIGN KEY (`corpus_file_sentence_id`) REFERENCES `CORPUS_FILE_SENTENCE` (`corpus_file_sentence_id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1;
I need to show the result of Coreference Resolution along with the original sentence and I was trying
select
CFS.corpus_file_sentence_id,
CFS.sentence_number,
CFS.corpus_file_sentence_text,
CFPS.coreference_resolved_sentence_text
from CORPUS_FILE_SENTENCE CFS
inner join CORPUS_FILE_PREPROCESSED_SENTENCE CFPS
on CFS.corpus_file_sentence_id = CFPS.corpus_file_sentence_id
and CFS.corpus_file_id = 3;
This is resulting in duplicate rows of the form
How do I remove the redundant rows?
It is really strange that you get these results, but for these results you can remove the rows with nulls in the column coreference_resolved_sentence_text
with group by
and aggregation:
select
CFS.corpus_file_sentence_id, CFS.sentence_number,
CFS.corpus_file_sentence_text, max(CFPS.coreference_resolved_sentence_text)
from CORPUS_FILE_SENTENCE CFS inner join CORPUS_FILE_PREPROCESSED_SENTENCE CFPS
on CFS.corpus_file_sentence_id = CFPS.corpus_file_sentence_id and CFS.corpus_file_id = 3
group by
CFS.corpus_file_sentence_id,
CFS.sentence_number,
CFS.corpus_file_sentence_text