Search code examples
azuresharepointmicrosoft-graph-apisharepoint-onlineazure-ad-graph-api

how to get files of a specific folder path in sharepoint using /lists graphapi


i was able to get folders of a specific folder path using /drives but in /lists it will return files also not just folders unlike /drives , i already made a python script to retrieve files from documents in sharepoint , but now i need to get files of a specific folder not the whole documents folders

here is the graph api url to retrieve folders from a specific path

https://graph.microsoft.com/v1.0/sites/siteId/drives/driveId/root:/General/index/images/animals:/children?$top=15

here is my python script to get files and folders using /lists

async def fetch_file_details(session, url, headers):

    async with session.get(url, headers=headers) as response:

        print(f"headers {response.headers} ")

        return await response.json()




async def get_all_images_in_library(accessToken, siteId, libraryId, batch_size=15):

    url = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items?top={batch_size}"

    headers = {

        'Authorization': f'Bearer {accessToken}',

        'Accept': 'application/json',

        'Prefer': 'HonorNonIndexedQueriesWarningMayFailRandomly'

    }




    async with aiohttp.ClientSession() as session:

        while url:

            async with session.get(url, headers=headers) as response:

                if response.status != 200:

                    print(f"Failed to fetch: {response.status}")

                    retry_after = response.headers.get('Retry-After')

                    throttle_limit_percentage = response.headers.get('x-ms-throttle-limit-percentage')

                    throttle_scope = response.headers.get('x-ms-throttle-scope')

                    throttle_reason = response.headers.get('x-ms-throttle-reason')

                    print(f"headers {response.headers} ")

                    if retry_after:

                        print(f"Retry-After: {retry_after} seconds")

                    if throttle_limit_percentage:

                        print(f"Throttle Limit Percentage: {throttle_limit_percentage}%")

                    if throttle_scope:

                        print(f"Throttle Scope: {throttle_scope}")

                    if throttle_reason:

                        print(f"Throttle Reason: {throttle_reason}")

                   

                    break

               

                data = await response.json()




            items = data.get('value', [])

            if not items:

                break




            tasks = []

            for item in items:

                webUrl = item.get('webUrl', '')




                if webUrl.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):

                    fileDetailsUrl = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items/{item['id']}?expand=fields"

                    task = fetch_file_details(session, fileDetailsUrl, headers)

                    tasks.append(task)




            if tasks:

                batch_results = await asyncio.gather(*tasks)

                yield batch_results




            await asyncio.sleep(0.1)

           

            url = data.get('@odata.nextLink')

my question is how can i get files and folders from a specific path using /lists ?


Solution

  • For sample, I have a folder structure like below and uploaded few images in animals folder:

    enter image description here

    To get the files of a specific folder path in SharePoint using /lists, modify the code like below:

    import aiohttp
    import asyncio
    
    # Fetch file details helper function
    async def fetch_file_details(session, url, headers):
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                return await response.json()
            else:
                print(f"Failed to fetch details for URL: {url}, Status: {response.status}")
                return None
    
    # Function to get all items in the document library (no folder filter)
    async def get_all_items_in_library(accessToken, siteId, libraryId, batch_size=15):
        # URL without folder filter
        url = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items?top={batch_size}"
    
        headers = {
            'Authorization': f'Bearer {accessToken}',
            'Accept': 'application/json',
            'Prefer': 'HonorNonIndexedQueriesWarningMayFailRandomly'
        }
    
        async with aiohttp.ClientSession() as session:
            while url:
                async with session.get(url, headers=headers) as response:
                    if response.status != 200:
                        print(f"Failed to fetch: {response.status}")
                        retry_after = response.headers.get('Retry-After')
                        throttle_limit_percentage = response.headers.get('x-ms-throttle-limit-percentage')
                        throttle_scope = response.headers.get('x-ms-throttle-scope')
                        throttle_reason = response.headers.get('x-ms-throttle-reason')
                        print(f"headers {response.headers}")
                        if retry_after:
                            print(f"Retry-After: {retry_after} seconds")
                        if throttle_limit_percentage:
                            print(f"Throttle Limit Percentage: {throttle_limit_percentage}%")
                        if throttle_scope:
                            print(f"Throttle Scope: {throttle_scope}")
                        if throttle_reason:
                            print(f"Throttle Reason: {throttle_reason}")
                        break
    
                    data = await response.json()
                    items = data.get('value', [])
    
                    if not items:
                        break
    
                    tasks = []
                    for item in items:
                        webUrl = item.get('webUrl', '')
                        # If the file is an image (jpg, png, etc.), fetch its details
                        if webUrl.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
                            fileDetailsUrl = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items/{item['id']}?expand=fields"
                            task = fetch_file_details(session, fileDetailsUrl, headers)
                            tasks.append(task)
    
                    # Process the batch of file details
                    if tasks:
                        batch_results = await asyncio.gather(*tasks)
                        yield batch_results
    
                # Move to the next page if there is one
                url = data.get('@odata.nextLink')
    
                # Throttling, wait a bit before the next request
                await asyncio.sleep(0.1)
    
    # Function to format and display the file details
    def format_and_display_file_details(files):
        for file in files:
            if file:
                web_url = file.get('webUrl', 'No URL')
                # Check if the webUrl matches the desired pattern
                if web_url.startswith("https://YourDomain.sharepoint.com/sites/RukSite/Shared%20Documents/General/index/images/animals/"):
                    file_name = file.get('fields', {}).get('FileLeafRef', 'Unknown')
                    file_size = file.get('fields', {}).get('FileSizeDisplay', 'Unknown size')
                    created_by = file.get('createdBy', {}).get('user', {}).get('displayName', 'Unknown')
                    created_date = file.get('createdDateTime', 'Unknown')
                    modified_by = file.get('lastModifiedBy', {}).get('user', {}).get('displayName', 'Unknown')
                    modified_date = file.get('lastModifiedDateTime', 'Unknown')
    
                    print(f"\nFile: {file_name}")
                    print("-" * 25)
                    print(f"- **Web URL**: {web_url}")
                    print(f"- **File Size**: {file_size} bytes")
                    print(f"- **Created By**: {created_by}")
                    print(f"- **Created Date**: {created_date}")
                    print(f"- **Last Modified By**: {modified_by}")
                    print(f"- **Last Modified Date**: {modified_date}")
                    print("-" * 25)
    
    # Example usage
    async def main():
        access_token = 'YourAccessToken'  # Your access token here
        site_id = 'SiteID'  # Your site ID
        library_id = 'ListID'  # Your document library list ID
    
        # Fetch and process images in the library (no folder filter)
        async for batch in get_all_items_in_library(access_token, site_id, library_id):
            format_and_display_file_details(batch)
    
    # Run the script
    asyncio.run(main())
    

    I got the files successfully in animals folder:

    enter image description here