Search code examples

How to delete a slide that contains a certain text from all the pptx files in multiple directories?

I am trying to delete all the slides that contains a specific text ('apple') from all the pptx files in multiple directories. The code below is able to accomplish that.

import os
import glob
import pptx

def delete_slide_with_name(presentation, name):
    new_presentation = pptx.Presentation()
    for slide in presentation.slides:
        include_slide = True
        for shape in slide.shapes:
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        if name in run.text:
                            include_slide = False
                    if not include_slide:
                if not include_slide:
            if not include_slide:
        if include_slide:
            new_slide = new_presentation.slides.add_slide(slide.slide_layout)
            for shp in slide.shapes:
                el = shp.element
                new_slide.shapes._spTree.insert_element_before(el, 'p:extLst')
    return new_presentation

path = "C:/Study/Thesis/main_project_folder/Test_Sensitive_Data_Script"
for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith(".pptx"):
            file_path = os.path.join(root, file)
            presentation = pptx.Presentation(file_path)
            new_presentation = delete_slide_with_name(presentation, "apple")

But the problem is, although the code is able to delete the slide, it's not able to reproduce a readable file. When I try to open the file, a prompt pops up showing that the file needs to be repaired. After repairing, the content structure of the file has changed.


enter image description here


enter image description here

enter image description here

How can I reproduce all the slides exactly as it was without the deleted slide?


  • Is there a reason you don't just delete the slides rather than adding the slides you want to a new presentation?

    import os
    import glob
    import pptx
    def delete_slide_with_name(presentation, name):
        xml_slides = presentation.slides._sldIdLst  
        slides = list(xml_slides)
        for i in range(len(slides),0,-1):
            slide = presentation.slides[i-1]
            for shape in slide.shapes:
                if shape.has_text_frame:
                    for paragraph in shape.text_frame.paragraphs:
                        for run in paragraph.runs:
                            if name in run.text:
    path = "C:\\Users\\lukeb\\Downloads\\test\\"
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".pptx"):
                file_path = os.path.join(root, file)
                presentation = pptx.Presentation(file_path)
                delete_slide_with_name(presentation, "apple")