azure sharepoint microsoft-graph-api sharepoint-online azure-ad-graph-api

how to get files of a specific folder path in sharepoint using /lists graphapi

i was able to get folders of a specific folder path using /drives but in /lists it will return files also not just folders unlike /drives , i already made a python script to retrieve files from documents in sharepoint , but now i need to get files of a specific folder not the whole documents folders

here is the graph api url to retrieve folders from a specific path

https://graph.microsoft.com/v1.0/sites/siteId/drives/driveId/root:/General/index/images/animals:/children?$top=15

here is my python script to get files and folders using /lists

async def fetch_file_details(session, url, headers):

    async with session.get(url, headers=headers) as response:

        print(f"headers {response.headers} ")

        return await response.json()




async def get_all_images_in_library(accessToken, siteId, libraryId, batch_size=15):

    url = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items?top={batch_size}"

    headers = {

        'Authorization': f'Bearer {accessToken}',

        'Accept': 'application/json',

        'Prefer': 'HonorNonIndexedQueriesWarningMayFailRandomly'

    }




    async with aiohttp.ClientSession() as session:

        while url:

            async with session.get(url, headers=headers) as response:

                if response.status != 200:

                    print(f"Failed to fetch: {response.status}")

                    retry_after = response.headers.get('Retry-After')

                    throttle_limit_percentage = response.headers.get('x-ms-throttle-limit-percentage')

                    throttle_scope = response.headers.get('x-ms-throttle-scope')

                    throttle_reason = response.headers.get('x-ms-throttle-reason')

                    print(f"headers {response.headers} ")

                    if retry_after:

                        print(f"Retry-After: {retry_after} seconds")

                    if throttle_limit_percentage:

                        print(f"Throttle Limit Percentage: {throttle_limit_percentage}%")

                    if throttle_scope:

                        print(f"Throttle Scope: {throttle_scope}")

                    if throttle_reason:

                        print(f"Throttle Reason: {throttle_reason}")

                   

                    break

               

                data = await response.json()




            items = data.get('value', [])

            if not items:

                break




            tasks = []

            for item in items:

                webUrl = item.get('webUrl', '')




                if webUrl.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):

                    fileDetailsUrl = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items/{item['id']}?expand=fields"

                    task = fetch_file_details(session, fileDetailsUrl, headers)

                    tasks.append(task)




            if tasks:

                batch_results = await asyncio.gather(*tasks)

                yield batch_results




            await asyncio.sleep(0.1)

           

            url = data.get('@odata.nextLink')

my question is how can i get files and folders from a specific path using /lists ?

Solution

For sample, I have a folder structure like below and uploaded few images in animals folder:

enter image description here

To get the files of a specific folder path in SharePoint using /lists, modify the code like below:

import aiohttp
import asyncio

# Fetch file details helper function
async def fetch_file_details(session, url, headers):
    async with session.get(url, headers=headers) as response:
        if response.status == 200:
            return await response.json()
        else:
            print(f"Failed to fetch details for URL: {url}, Status: {response.status}")
            return None

# Function to get all items in the document library (no folder filter)
async def get_all_items_in_library(accessToken, siteId, libraryId, batch_size=15):
    # URL without folder filter
    url = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items?top={batch_size}"

    headers = {
        'Authorization': f'Bearer {accessToken}',
        'Accept': 'application/json',
        'Prefer': 'HonorNonIndexedQueriesWarningMayFailRandomly'
    }

    async with aiohttp.ClientSession() as session:
        while url:
            async with session.get(url, headers=headers) as response:
                if response.status != 200:
                    print(f"Failed to fetch: {response.status}")
                    retry_after = response.headers.get('Retry-After')
                    throttle_limit_percentage = response.headers.get('x-ms-throttle-limit-percentage')
                    throttle_scope = response.headers.get('x-ms-throttle-scope')
                    throttle_reason = response.headers.get('x-ms-throttle-reason')
                    print(f"headers {response.headers}")
                    if retry_after:
                        print(f"Retry-After: {retry_after} seconds")
                    if throttle_limit_percentage:
                        print(f"Throttle Limit Percentage: {throttle_limit_percentage}%")
                    if throttle_scope:
                        print(f"Throttle Scope: {throttle_scope}")
                    if throttle_reason:
                        print(f"Throttle Reason: {throttle_reason}")
                    break

                data = await response.json()
                items = data.get('value', [])

                if not items:
                    break

                tasks = []
                for item in items:
                    webUrl = item.get('webUrl', '')
                    # If the file is an image (jpg, png, etc.), fetch its details
                    if webUrl.lower().endswith(('.jpg', '.jpeg', '.png', '.gif')):
                        fileDetailsUrl = f"https://graph.microsoft.com/v1.0/sites/{siteId}/lists/{libraryId}/items/{item['id']}?expand=fields"
                        task = fetch_file_details(session, fileDetailsUrl, headers)
                        tasks.append(task)

                # Process the batch of file details
                if tasks:
                    batch_results = await asyncio.gather(*tasks)
                    yield batch_results

            # Move to the next page if there is one
            url = data.get('@odata.nextLink')

            # Throttling, wait a bit before the next request
            await asyncio.sleep(0.1)

# Function to format and display the file details
def format_and_display_file_details(files):
    for file in files:
        if file:
            web_url = file.get('webUrl', 'No URL')
            # Check if the webUrl matches the desired pattern
            if web_url.startswith("https://YourDomain.sharepoint.com/sites/RukSite/Shared%20Documents/General/index/images/animals/"):
                file_name = file.get('fields', {}).get('FileLeafRef', 'Unknown')
                file_size = file.get('fields', {}).get('FileSizeDisplay', 'Unknown size')
                created_by = file.get('createdBy', {}).get('user', {}).get('displayName', 'Unknown')
                created_date = file.get('createdDateTime', 'Unknown')
                modified_by = file.get('lastModifiedBy', {}).get('user', {}).get('displayName', 'Unknown')
                modified_date = file.get('lastModifiedDateTime', 'Unknown')

                print(f"\nFile: {file_name}")
                print("-" * 25)
                print(f"- **Web URL**: {web_url}")
                print(f"- **File Size**: {file_size} bytes")
                print(f"- **Created By**: {created_by}")
                print(f"- **Created Date**: {created_date}")
                print(f"- **Last Modified By**: {modified_by}")
                print(f"- **Last Modified Date**: {modified_date}")
                print("-" * 25)

# Example usage
async def main():
    access_token = 'YourAccessToken'  # Your access token here
    site_id = 'SiteID'  # Your site ID
    library_id = 'ListID'  # Your document library list ID

    # Fetch and process images in the library (no folder filter)
    async for batch in get_all_items_in_library(access_token, site_id, library_id):
        format_and_display_file_details(batch)

# Run the script
asyncio.run(main())

I got the files successfully in animals folder:

enter image description here