Search code examples
assemblyx86doswatcom

How to do DMA transfers under DOS extender or DPMI environment?


How can a program use DMA transfers while running under a DOS extender+DPMI environment ?

I mean how can we allocate and get physical address of the dma buffer allocated, so as to supply that physical address to the DMA controller, or PCI bus master device.

There are two possiblities:

The DOS Extender or DPMI server/host support virtual memory. eg Causeway.

The DOS Extender or DPMI server/host doesn't support virtual memory, but paging is enabled. eg DOS32a.

I'm using Open Watcom C compiler.

The runtime environment is :

FreeDOS + XMS(no EMS/EMM386) + DOS extender(DOS32a)

For DJGPP, the solution is here

But will that last mentioned solution ie via XMS, work with DOS32a too?

DOS32a docs say that before switching to protected mode, it allocates all of the available extended memory, and then our program can allocate that memory via DPMI function 501h.

NOTE: The dma buffer can be 1MB or so, so I can't use conventional memory for it.


Solution

  • For a clean DPMI solution you may want to explore the following DPMI functions (excerpts from Ralf Brown's Interrupt List):

    INT 31 P - DPMI 1.0+ - MAP DEVICE IN MEMORY BLOCK
            AX = 0508h
            ESI = memory block handle
            EBX = page-aligned offset within memory block of page(s) to be mapped
            ECX = number of pages to map
            EDX = page-aligned physical address of device
    Return: CF clear if successful
            CF set on error
                AX = error code (8001h,8003h,8023h,8025h) (see #03143)
    Notes:  only supported by 32-bit DPMI hosts, but may be used by 16-bit clients
            support of this function is optional; hosts are also allowed to support
              the function for some devices but not others
    
    INT 31 P - DPMI 1.0+ - MAP CONVENTIONAL MEMORY IN MEMORY BLOCK
            AX = 0509h
            ESI = memory block handle
            EBX = page-aligned offset within memory block of page(s) to map
            ECX = number of pages to map
            EDX = page-aligned linear address of conventional (below 1M) memory
    Return: CF clear if successful
            CF set on error
                AX = error code (8001h,8003h,8023h,8025h) (see #03143)
    Notes:  only supported by 32-bit DPMI hosts, but may be used by 16-bit clients
            support of this function is optional
    
    INT 31 P - DPMI 0.9+ - PHYSICAL ADDRESS MAPPING
            AX = 0800h
            BX:CX = physical address (should be above 1 MB)
            SI:DI = size in bytes
    Return: CF clear if successful
                BX:CX = linear address which maps the requested physical memory
            CF set on error
                AX = error code (DPMI 1.0+) (8003h,8021h) (see #03143)
    Notes:  implementations may refuse this call because it can circumvent protects
            the caller must build an appropriate selector for the memory
            do not use for memory mapped in the first megabyte
    

    If neither of the above lets you map virtual addresses to physical addresses nor obtain physical addresses of allocated blocks (e.g. unsupported), you'll need to look into implementation details of your DPMI host (e.g. if it doesn't enable page translation or it can be turned off, then all addresses are physical).

    EDIT: It looks like you should be able to allocate memory (more than and beyond 1MB) and get its physical and virtual addresses. First, allocate it using XMS/Himem.sys and lock it. That'll give you the physical address. Next, use the DPMI function 0x800 to get the corresponding virtual address.

    Here's how (disregard the 16-bit version (compiled with Borland/Turbo C/C++), it's only used only to validate the XMS routines):

    // file: dma.c
    //
    // Compiling with Open Watcom C/C++ and DOS/32 DOS extender/DPMI host:
    //   wcl386.exe /q /we /wx /bcl=dos4g dma.c
    //   sb.exe /b /bndmados32.exe dma.exe
    // Before running dmados32.exe do "set DOS32A=/EXTMEM:4096"
    // to limit the amount of extended (XMS) memory allocated by DOS/32
    // at program start (by default it allocates everything).
    //
    // Compiling with 16-bit Borland/Turbo C/C++:
    //   tcc.exe dma.c
    
    #include <stdio.h>
    #include <string.h>
    #include <dos.h>
    #include <limits.h>
    
    #if defined(__WATCOMC__)
    #if !defined(__386__)
    #error unsupported target, must be 32-bit (DPMI) DOS app
    #endif
    #elif defined(__TURBOC__)
    #if !defined(__SMALL__)
    #error unsupported target, must be 16-bit DOS app with small memory model
    #endif
    #else
    #error unsupported compiler
    #endif
    
    typedef unsigned uint;
    typedef unsigned long ulong;
    
    typedef signed char int8;
    typedef unsigned char uint8;
    
    typedef short int16;
    typedef unsigned short uint16;
    
    #if UINT_MIN >= 0xFFFFFFFF
    typedef int int32;
    typedef unsigned uint32;
    #else
    typedef long int32;
    typedef unsigned long uint32;
    #endif
    
    #pragma pack(push, 1)
    
    typedef struct tDpmiRmInt
    {
      uint32 edi, esi, ebp, resz0, ebx, edx, ecx, eax;
      uint16 flags, es, ds, fs, gs, ip, cs, sp, ss;
    } tDpmiRmInt;
    
    #pragma pack(pop)
    
    int RmInt(uint8 IntNumber, tDpmiRmInt* pRegs)
    {
    #if defined(__WATCOMC__)
      union REGS inregs, outregs;
    
      memset(&inregs, 0, sizeof(inregs));
      memset(&outregs, 0, sizeof(outregs));
    
      inregs.w.ax = 0x300;
      inregs.h.bl = IntNumber;
      inregs.h.bh = 0;
      inregs.w.cx = 0;
      inregs.x.edi = (uint32)pRegs;
    
      return int386(0x31, &inregs, &outregs);
    #elif defined(__TURBOC__)
      struct REGPACK regs;
    
      memset(&regs, 0, sizeof(regs));
    
      regs.r_ax = (uint16)pRegs->eax;
      regs.r_bx = (uint16)pRegs->ebx;
      regs.r_cx = (uint16)pRegs->ecx;
      regs.r_dx = (uint16)pRegs->edx;
      regs.r_si = (uint16)pRegs->esi;
      regs.r_di = (uint16)pRegs->edi;
      regs.r_bp = (uint16)pRegs->ebp;
      regs.r_flags = pRegs->flags;
      regs.r_ds = pRegs->ds;
      regs.r_es = pRegs->es;
    
      // No fs, gs (16-bit code)
      // No ss:sp, cs:ip (int*()/intr() functions set the right values)
    
      intr(IntNumber, &regs);
    
      memset(pRegs, 0, sizeof(*pRegs));
    
      pRegs->eax = regs.r_ax;
      pRegs->ebx = regs.r_bx;
      pRegs->ecx = regs.r_cx;
      pRegs->edx = regs.r_dx;
      pRegs->esi = regs.r_si;
      pRegs->edi = regs.r_di;
      pRegs->ebp = regs.r_bp;
      pRegs->flags = regs.r_flags;
      pRegs->ds = regs.r_ds;
      pRegs->es = regs.r_es;
    
      return regs.r_ax;
    #endif
    }
    
    int RmFarCall(tDpmiRmInt* pRegs)
    {
    #if defined(__WATCOMC__)
      union REGS inregs, outregs;
    
      memset(&inregs, 0, sizeof(inregs));
      memset(&outregs, 0, sizeof(outregs));
    
      inregs.w.ax = 0x301;
      inregs.h.bh = 0;
      inregs.w.cx = 0;
      inregs.x.edi = (uint32)pRegs;
    
      return int386(0x31, &inregs, &outregs);
    #elif defined(__TURBOC__)
      uint8 code[128];
      uint8* p = code;
      void far* codef = &code[0];
      void (far* f)(void) = (void(far*)(void))codef;
    
      *p++ = 0x60;                                                            // pusha
      *p++ = 0x1E;                                                            // push  ds
      *p++ = 0x06;                                                            // push  es
    
      *p++ = 0x68; *p++ = (uint8)pRegs->ds; *p++ = (uint8)(pRegs->ds >> 8);   // push #
      *p++ = 0x1F;                                                            // pop  ds
      *p++ = 0x68; *p++ = (uint8)pRegs->es; *p++ = (uint8)(pRegs->es >> 8);   // push #
      *p++ = 0x07;                                                            // pop  es
    
      *p++ = 0xb8; *p++ = (uint8)pRegs->eax; *p++ = (uint8)(pRegs->eax >> 8); // mov ax, #
      *p++ = 0xbb; *p++ = (uint8)pRegs->ebx; *p++ = (uint8)(pRegs->ebx >> 8); // mov bx, #
      *p++ = 0xb9; *p++ = (uint8)pRegs->ecx; *p++ = (uint8)(pRegs->ecx >> 8); // mov cx, #
      *p++ = 0xba; *p++ = (uint8)pRegs->edx; *p++ = (uint8)(pRegs->edx >> 8); // mov dx, #
      *p++ = 0xbe; *p++ = (uint8)pRegs->esi; *p++ = (uint8)(pRegs->esi >> 8); // mov si, #
      *p++ = 0xbf; *p++ = (uint8)pRegs->edi; *p++ = (uint8)(pRegs->edi >> 8); // mov di, #
      *p++ = 0xbd; *p++ = (uint8)pRegs->ebp; *p++ = (uint8)(pRegs->ebp >> 8); // mov bp, #
    
      *p++ = 0x9A; *p++ = (uint8)pRegs->ip; *p++ = (uint8)(pRegs->ip >> 8);
                   *p++ = (uint8)pRegs->cs; *p++ = (uint8)(pRegs->cs >> 8);   // call far seg:offs
    
      *p++ = 0x60;                                                            // pusha
      *p++ = 0x1E;                                                            // push  ds
      *p++ = 0x06;                                                            // push  es
      *p++ = 0x89; *p++ = 0xE5;                                               // mov   bp, sp
      *p++ = 0x8E; *p++ = 0x5E; *p++ = 0x16;                                  // mov   ds, [bp + 0x16]
      *p++ = 0x89; *p++ = 0xEE;                                               // mov   si, bp
      *p++ = 0xFC;                                                            // cld
    
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->es; *p++ = (uint8)((uint16)&pRegs->es >> 8);  // mov [], ax (es)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->ds; *p++ = (uint8)((uint16)&pRegs->ds >> 8);  // mov [], ax (ds)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->edi; *p++ = (uint8)((uint16)&pRegs->edi >> 8);  // mov [], ax (di)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->esi; *p++ = (uint8)((uint16)&pRegs->esi >> 8);  // mov [], ax (si)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->ebp; *p++ = (uint8)((uint16)&pRegs->ebp >> 8);  // mov [], ax (bp)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->ebx; *p++ = (uint8)((uint16)&pRegs->ebx >> 8);  // mov [], ax (bx)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->edx; *p++ = (uint8)((uint16)&pRegs->edx >> 8);  // mov [], ax (dx)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->ecx; *p++ = (uint8)((uint16)&pRegs->ecx >> 8);  // mov [], ax (cx)
      *p++ = 0xAD;                                                            // lodsw          
      *p++ = 0xA3; *p++ = (uint8)&pRegs->eax; *p++ = (uint8)((uint16)&pRegs->eax >> 8);  // mov [], ax (ax)
    
      *p++ = 0x83; *p++ = 0xC4; *p++ = 0x14;                                  // add   sp, 0x14
    
      *p++ = 0x07;                                                            // pop   es
      *p++ = 0x1F;                                                            // pop   ds
      *p++ = 0x61;                                                            // popa
      *p++ = 0xCB;                                                            // retf
    
      f();
    
      return (uint16)pRegs->eax;
    #endif
    }
    
    struct
    {
      uint16 Ip, Cs;
    } XmsEntryPoint = { 0 };
    
    int XmsSupported(void)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x4300;
      RmInt(0x2F, &regs);
    
      return (regs.eax & 0xFF) == 0x80;
    }
    
    void XmsInit(void)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x4310;
      RmInt(0x2F, &regs);
    
      XmsEntryPoint.Cs = regs.es;
      XmsEntryPoint.Ip = (uint16)regs.ebx;
    }
    
    int XmsQueryVersions(uint16* pXmsVer, uint16* pHimemVer)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x00 << 8;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      if (pXmsVer != NULL)
        *pXmsVer = (uint16)regs.eax;
    
      if (pHimemVer != NULL)
        *pHimemVer = (uint16)regs.ebx;
    
      return (int)(regs.ebx & 0xFF);
    }
    
    int XmsQueryFreeMem(uint16* pLargest, uint16* pTotal)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x08 << 8;
      regs.ebx = 0;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      if (pLargest != NULL)
        *pLargest = (uint16)regs.eax;
    
      if (pTotal != NULL)
        *pTotal = (uint16)regs.edx;
    
      return (int)(regs.ebx & 0xFF);
    }
    
    int XmsAllocMem(uint16* pHandle, uint16 Size)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x09 << 8;
      regs.edx = Size;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      *pHandle = (uint16)regs.edx;
    
      return (int)(regs.ebx & 0xFF);
    }
    
    int XmsFreeMem(uint16 Handle)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x0A << 8;
      regs.edx = Handle;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      return (int)(regs.ebx & 0xFF);
    }
    
    int XmsLockMem(uint16 Handle, uint32* pPhysAddr)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x0C << 8;
      regs.edx = Handle;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      *pPhysAddr = ((regs.edx & 0xFFFF) << 16) | (regs.ebx & 0xFFFF);
    
      return (int)(regs.ebx & 0xFF);
    }
    
    #if defined(__TURBOC__)
    int XmsCopyMem(uint16 DstHandle, uint32 DstOffs, uint16 SrcHandle, uint32 SrcOffs, uint32 Size)
    {
      tDpmiRmInt regs;
    #pragma pack(push, 1)
      struct
      {
        uint32 Size;
        uint16 SrcHandle;
        uint32 SrcOffs;
        uint16 DstHandle;
        uint32 DstOffs;
      } emm;
    #pragma pack(pop)
    
      emm.Size      = Size;
      emm.SrcHandle = SrcHandle;
      emm.SrcOffs   = SrcOffs;
      emm.DstHandle = DstHandle;
      emm.DstOffs   = DstOffs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x0B << 8;
      regs.ds = FP_SEG(&emm);
      regs.esi = FP_OFF(&emm);
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      return (int)(regs.ebx & 0xFF);
    }
    #endif
    
    int XmsUnlockMem(uint16 Handle)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x0D << 8;
      regs.edx = Handle;
      regs.cs = XmsEntryPoint.Cs;
      regs.ip = XmsEntryPoint.Ip;
      RmFarCall(&regs);
    
      return (int)(regs.ebx & 0xFF);
    }
    
    #if defined(__WATCOMC__)
    int DpmiMap(void** pPtr, uint32 PhysAddr, uint32 Size)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x800;
      regs.ebx = PhysAddr >> 16;
      regs.ecx = PhysAddr & 0xFFFF;
      regs.esi = Size >> 16;
      regs.edi = Size & 0xFFFF;
      RmInt(0x31, &regs);
    
      *pPtr = (void*)(((regs.ebx & 0xFFFF) << 16) | (regs.ecx & 0xFFFF));
    
      return regs.flags & 1;
    }
    
    int DpmiUnmap(void* Ptr)
    {
      tDpmiRmInt regs;
    
      memset(&regs, 0, sizeof(regs));
      regs.eax = 0x801;
      regs.ebx = (uint32)Ptr >> 16;
      regs.ecx = (uint32)Ptr & 0xFFFF;
      RmInt(0x31, &regs);
    
      return regs.flags & 1;
    }
    #endif
    
    int main(void)
    {
      uint16 xmsVer, himemVer;
      uint16 largestFreeSz, totalFreeSz;
      uint16 handle;
      uint32 physAddr;
    
    #if defined(__WATCOMC__)
      {
        uint32 cr0__ = 0, cr3__ = 0;
        __asm
        {
          mov eax, cr0
          mov cr0__, eax
          mov eax, cr3
          mov cr3__, eax
        }
        printf("CR0: 0x%08lX, CR3: 0x%08lX\n", (ulong)cr0__, (ulong)cr3__);
      }
    #endif
    
      if (!XmsSupported())
      {
        printf("XMS unsupported\n");
        goto Exit;
      }
      printf("XMS supported\n");
    
      XmsInit();
      printf("XMS entry point: 0x%04X:0x%04X\n",
             XmsEntryPoint.Cs, XmsEntryPoint.Ip);
    
      XmsQueryVersions(&xmsVer, &himemVer);
      printf("XMS version: 0x%X  Himem.sys version: 0x%X\n",
             xmsVer, himemVer);
    
      XmsQueryFreeMem(&largestFreeSz, &totalFreeSz);
      printf("Largest free block size: %u KB  Total free memory: %u KB\n",
             largestFreeSz, totalFreeSz);
    
      printf("Allocating the DMA buffer...\n");
      if (XmsAllocMem(&handle, 64))
      {
        printf("Failed to allocate the DMA buffer\n");
        goto Exit;
      }
    
      XmsQueryFreeMem(&largestFreeSz, &totalFreeSz);
      printf("Largest free block size: %u KB  Total free memory: %u KB\n",
             largestFreeSz, totalFreeSz);
    
      printf("Locking the DMA buffer...\n");
      if (XmsLockMem(handle, &physAddr))
      {
        printf("Failed to lock the DMA buffer\n");
      }
      else
      {
        printf("The DMA buffer is at physical address: 0x%08lX\n", (ulong)physAddr);
    
    #if defined(__WATCOMC__)
        {
          uint8* ptr;
    
          printf("Mapping the DMA buffer...\n");
    
          if (DpmiMap((void**)&ptr, physAddr, 64 * 1024UL))
          {
            printf("Failed to map the DMA buffer\n");
          }
          else
          {
            printf("The DMA buffer is at virtual address: 0x%08lX\n", (ulong)ptr);
    
            printf("Using the DMA buffer...\n");
            strcpy(ptr, "This is a test string in the DMA buffer.");
            printf("%s\n", ptr);
    
            DpmiUnmap(ptr);
          }
        }
    #elif defined(__TURBOC__)
        {
          char testStr[] = "This is a test string copied to and from the DMA buffer.";
          printf("Using the DMA buffer...\n");
          if (XmsCopyMem(handle, 0, 0, ((uint32)FP_SEG(testStr) << 16) + FP_OFF(testStr), sizeof(testStr)))
          {
            printf("Failed to copy to the DMA buffer\n");
          }
          else
          {
            memset(testStr, 0, sizeof(testStr));
            if (XmsCopyMem(0, ((uint32)FP_SEG(testStr) << 16) + FP_OFF(testStr), handle, 0, sizeof(testStr)))
            {
              printf("Failed to copy from the DMA buffer\n");
            }
            else
            {
              printf("%s\n", testStr);
            }
          }
        }
    #endif
    
        XmsUnlockMem(handle);
      }
    
      XmsFreeMem(handle);
    
      XmsQueryFreeMem(&largestFreeSz, &totalFreeSz);
      printf("Largest free block size: %u KB  Total free memory: %u KB\n",
             largestFreeSz, totalFreeSz);
    
    Exit:
    
      return 0;
    }
    

    Sample output (under DosBox):

    CR0: 0x00000001, CR3: 0x00000000
    XMS supported
    XMS entry point: 0xC83F:0x0010
    XMS version: 0x300  Himem.sys version: 0x301
    Largest free block size: 11072 KB  Total free memory: 11072 KB
    Allocating the DMA buffer...
    Largest free block size: 11008 KB  Total free memory: 11008 KB
    Locking the DMA buffer...
    The DMA buffer is at physical address: 0x00530000
    Mapping the DMA buffer...
    The DMA buffer is at virtual address: 0x00530000
    Using the DMA buffer...
    This is a test string in the DMA buffer.
    Largest free block size: 11072 KB  Total free memory: 11072 KB
    

    Note that DOS/32 does not enable page translation (unless there's VCPI). CR0's PG bit is 0, CR3 is 0 and the obtained physical and virtual addresses are the same, everything speaks for that. And so the virtual and physical addresses are the same thing.