Search code examples
pythonsharepointoffice365

How do I download whole file system from a given Sharepoint site?


So I have a shaprepoint site with name "ABC". Below is the file structure in which files are stored in it:

|-Folder1
|  |--file_1_1.docx
|  |--file_1_2.docx
|
|-Folder2
|  |--file_2_1.docx
|
|-file_x.docx

Now I want to download the files to my local machine. I have heared that I can do this using "Office365-REST-Python-Client" lib. I have gone through different codes on internet but doesn't find any specific solution. Below is the code that gives me information about the all the flders but not about the files.

client_id = "XXXXX"
client_secret = "YYYY"

site_url = "https://tenantName.sharepoint.com/sites/ABC"
relative_url = "Documents"

def connectAuth():
    # Initialize the client credentials
    client_credentials = ClientCredential(client_id, client_secret)

    # create client context object
    ctx = ClientContext(site_url).with_credentials(client_credentials)
    
    try:
        libraryRoot = ctx.web.get_folder_by_server_relative_path(relative_url)
        libraryRoot.expand(["Files","Folders"]).get().execute_query()

        folder = libraryRoot.folders
        print(folder)
        
        print("Number of folders:", len(folder))
        print("Folder names:")
        for folder_item in folder:
            ctx.load(folder_item)
            ctx.execute_query()
            print(folder_item.properties["Name"])
            
    except Exception:
        traceback.print_exc()
        print('Problem printing out library contents')
        #sys.exit(1)

connectAuth()

Can you please help me out in downloading the files present in those folders as well as outside those folder. I mean I want files file_1_1.docx, file_1_2.docx, file_2_1.docx and file_x.docx to be downloaded to my local machine.


Solution

  • So, I got the answer, its not much of a efficient way to doing it but just gets the job done.

    def downlaod_file_from_folder(folder_url, site_url, client_id, client_secret, local_file_path):
        client_credentials = ClientCredential(client_id, client_secret)
        ctx = ClientContext(site_url).with_credentials(client_credentials)
        try:
            folderName = ctx.web.get_folder_by_server_relative_path(folder_url)
            ctx.load(folderName)
            ctx.execute_query()
            files = folderName.files
            ctx.load(files)
            ctx.execute_query()
            for file_item in files:
                ctx.load(file_item)
                ctx.execute_query()
                fileName = file_item.properties["Name"]    
                file_url = file_item.properties["ServerRelativeUrl"]
                final_file_path = local_file_path + "//" + fileName
                with open(final_file_path, "wb") as local_file:
                    file = ctx.web.get_file_by_server_relative_url(file_url)
                    file.download(local_file)
                    ctx.execute_query()
                print(f"File {fileName} downloaded successfully")
        except Exception as e:
            return {"Error": str(e)}
    
            
    def downloadFiles(relative_url, site_url, client_id, client_secret, local_file_path):
        client_credentials = ClientCredential(client_id, client_secret)
        cctx = ClientContext(site_url).with_credentials(client_credentials)
        try:
            libraryRoot = cctx.web.get_folder_by_server_relative_path(relative_url)
            libraryRoot.expand(["Files","Folders"]).get().execute_query()
            folders = libraryRoot.folders
            files = libraryRoot.files
            if len(files) > 0:
                folder_url = relative_url
                downlaod_file_from_folder(folder_url, site_url, client_id, client_secret, local_file_path)    
            if len(folders) > 0:
                for folder_item in folders:
                    cctx.load(folder_item)
                    cctx.execute_query()
                    if folder_item.properties["Name"] =="Forms":
                        pass
                    else:
                        folder_url = relative_url + "/" + str(folder_item.properties["Name"])
                        downloadFiles(folder_url, site_url, client_id, client_secret, local_file_path)
            else: 
                pass
            return{"Success": "Files Downloaded Successfully"}
        except Exception as e:
            return {"error":str(e)}