DirectX C API access via ffi
Tested on OBS Studio version 30.1.0, but should work on earlier versions too.
OBS Studio
Direct3D feature level is 11.0, and shader model version is 4.0.
Now, before we start, why even bother to writing raw DirectX at all instead of using libobs graphics wrappers?
While it is has an extensive list of supported features, it is mostly tailored to composition, filtering and capturing.
So if you want to do some effects that are outside of that paradigm, you may not be able to.
For example, you can't use Vertex Texture Fetch inside a vertex shader. Its resources are not set in the rendering pipeline.
But I will show you how I did it, the end result, without the important ffi bindings is only 4 lines of code in the rendering context.
Lua:
while S.gs_effect_loop(data.effect, "Custom1") do
S.gs_load_texture(data.tex1, 0) -- sets pixel shader resource view
data.PSGetShaderResources(data.pContext2, 0, 1, data.pRes)
data.VSSetShaderResources(data.pContext2, 0, 1, data.pRes)
S.gs_draw_sprite(nil, 0, 2560, 1440)
end
The tricky part is how to define these API calls. Actually it is very straightforward if we know the underlying model that DirectX uses.
COM - or Component Object Model is the main driving force behind it. I will not go deep into it, but here is a good 10/10
overview.
The other important
link is related to modding old games, the solution there is done with LuaJIT ffi and DirectX version 9.
Similarly, libobs provides an almost identical function to get device pointer.
It is technically possible to load d3d11 and create a device yourself, the same approach would work in a standalone interpreter, but I digress.
I'll add correctly indexed and full vtables later, for now let's focus on the glue code.
Lua:
data.device = obsffi.gs_get_device_obj()
data.pDevice = ffi.cast("struct d3ddevice*", data.device)
data.GetImmediateContext = ffi.cast("long(__stdcall*)(void*, void**)", data.pDevice.lpVtbl[40])
data._arg1 = ffi.new("unsigned long[1]")
data.pContext = ffi.cast("void**", data._arg1)
data.GetImmediateContext(data.pDevice, data.pContext)
data.pContext2 = ffi.cast("struct d3ddevicecontext*", data.pContext[0])
data.Release_pContext = ffi.cast("unsigned long(__stdcall*)(void*)", data.pContext2.lpVtbl[2])
data.Release_pDevice = ffi.cast("unsigned long(__stdcall*)(void*)", data.pDevice.lpVtbl[2])
data.VSSetShaderResources = ffi.cast("long(__stdcall*)(void*, unsigned int, unsigned int, void**)", data.pContext2.lpVtbl[25])
data.PSGetShaderResources = ffi.cast("long(__stdcall*)(void*, unsigned int, unsigned int, void**)", data.pContext2.lpVtbl[73])
data.VSSetSamplers = ffi.cast("long(__stdcall*)(void*, unsigned int, unsigned int, void**)", data.pContext2.lpVtbl[26])
data._arg2 = ffi.new("unsigned long[1]")
data.pRes = ffi.cast("void**", data._arg2)
As you can see, it looks very similar to the DirectX 9 link approach, experimentally it is found that we must use index instead of a name in our vtable.
Now the routine is to find an interesting function, and write it's binding to the following template:
data.VSSetShaderResources = ffi.cast("long(__stdcall*)(void*, unsigned int, unsigned int, void**)", data.pContext2.lpVtbl[25])
data.VSSetShaderResources - this is a permanent location on stack that survives garbage collector
"long(__stdcall*)(void*, unsigned int, unsigned int, void**)" - return type, calling convention, first argument is vtable, the type and number of arguments
ffi.cast("...", data.pContext2.lpVtbl[25]) - the index in the vtable
GetImmediateContext - MSDN says this will increment the reference count, so we must release it when we are done with it to avoid a memory leak, this is done in the destroy callback.
data.Release_pContext(data.pContext2)
--data.Release_pDevice(data.pDevice)
I commented Release_pDevice because we did not create this resource, so I think it is not our responsibility to deal with it.
So this is basically it, the lost device case is left as an exercise for the reader.
From my tests, this final code does not crash during execution and at exit nor it does it leak memory, the
gist features texture reading in vertex shader passing results to fragment shader, simple color cycle.
Looking forward to what you guys can do with all of this, some ideas :)
- Thread-safe texture reading from GPU to RAM
- Stateful particle system
- D3D11 clear screen, triangle
Vtables. I have found out the
Odin programming language vendor sources to be a reliable and readable source of vtables information for DirectX,
Although you can find enum lists and indexes on the net, I think it is fair to include a full description here.
Device
C:
struct d3ddeviceVTBL {
void *QueryInterface;
void *AddRef;
void *Release;
void *CreateBuffer;
void *CreateTexture1D;
void *CreateTexture2D;
void *CreateTexture3D;
void *CreateShaderResourceView;
void *CreateUnorderedAccessView;
void *CreateRenderTargetView;
void *CreateDepthStencilView;
void *CreateInputLayout;
void *CreateVertexShader;
void *CreateGeometryShader;
void *CreateGeometryShaderWithStreamOutput;
void *CreatePixelShader;
void *CreateHullShader;
void *CreateDomainShader;
void *CreateComputeShader;
void *CreateClassLinkage;
void *CreateBlendState;
void *CreateDepthStencilState;
void *CreateRasterizerState;
void *CreateSamplerState;
void *CreateQuery;
void *CreatePredicate;
void *CreateCounter;
void *CreateDeferredContext;
void *OpenSharedResource;
void *CheckFormatSupport;
void *CheckMultisampleQualityLevels;
void *CheckCounterInfo;
void *CheckCounter;
void *CheckFeatureSupport;
void *GetPrivateData;
void *SetPrivateData;
void *SetPrivateDataInterface;
void *GetFeatureLevel;
void *GetCreationFlags;
void *GetDeviceRemovedReason;
void *GetImmediateContext;
void *SetExceptionMode;
void *GetExceptionMode;
};
struct d3ddevice {
struct d3ddeviceVTBL** lpVtbl;
};
Device context
C:
struct d3ddevicecontextVTBL {
void *QueryInterface;
void *Addref;
void *Release;
void *GetDevice;
void *GetPrivateData;
void *SetPrivateData;
void *SetPrivateDataInterface;
void *VSSetConstantBuffers;
void *PSSetShaderResources;
void *PSSetShader;
void *SetSamplers;
void *SetShader;
void *DrawIndexed;
void *Draw;
void *Map;
void *Unmap;
void *PSSetConstantBuffer;
void *IASetInputLayout;
void *IASetVertexBuffers;
void *IASetIndexBuffer;
void *DrawIndexedInstanced;
void *DrawInstanced;
void *GSSetConstantBuffers;
void *GSSetShader;
void *IASetPrimitiveTopology;
void *VSSetShaderResources;
void *VSSetSamplers;
void *Begin;
void *End;
void *GetData;
void *GSSetPredication;
void *GSSetShaderResources;
void *GSSetSamplers;
void *OMSetRenderTargets;
void *OMSetRenderTargetsAndUnorderedAccessViews;
void *OMSetBlendState;
void *OMSetDepthStencilState;
void *SOSetTargets;
void *DrawAuto;
void *DrawIndexedInstancedIndirect;
void *DrawInstancedIndirect;
void *Dispatch;
void *DispatchIndirect;
void *RSSetState;
void *RSSetViewports;
void *RSSetScissorRects;
void *CopySubresourceRegion;
void *CopyResource;
void *UpdateSubresource;
void *CopyStructureCount;
void *ClearRenderTargetView;
void *ClearUnorderedAccessViewUint;
void *ClearUnorderedAccessViewFloat;
void *ClearDepthStencilView;
void *GenerateMips;
void *SetResourceMinLOD;
void *GetResourceMinLOD;
void *ResolveSubresource;
void *ExecuteCommandList;
void *HSSetShaderResources;
void *HSSetShader;
void *HSSetSamplers;
void *HSSetConstantBuffers;
void *DSSetShaderResources;
void *DSSetShader;
void *DSSetSamplers;
void *DSSetConstantBuffers;
void *DSSetShaderResources;
void *CSSetUnorderedAccessViews;
void *CSSetShader;
void *CSSetSamplers;
void *CSSetConstantBuffers;
void *VSGetConstantBuffers;
void *PSGetShaderResources;
void *PSGetShader;
void *PSGetSamplers;
void *VSGetShader;
void *PSGetConstantBuffers;
void *IAGetInputLayout;
void *IAGetVertexBuffers;
void *IAGetIndexBuffer;
void *GSGetConstantBuffers;
void *GSGetShader;
void *IAGetPrimitiveTopology;
void *VSGetShaderResources;
void *VSGetSamplers;
void *GetPredication;
void *GSGetShaderResources;
void *GSGetSamplers;
void *OMGetRenderTargets;
void *OMGetRenderTargetsAndUnorderedAccessViews;
void *OMGetBlendState;
void *OMGetDepthStencilState;
void *SOGetTargets;
void *RSGetState;
void *RSGetViewports;
void *RSGetScissorRects;
void *HSGetShaderResources;
void *HSGetShader;
void *HSGetSamplers;
void *HSGetConstantBuffers;
void *DSGetShaderResources;
void *DSGetShader;
void *DSGetSamplers;
void *DSGetConstantBuffers;
void *CSGetShaderResources;
void *CSGetUnorderedAccessViews;
void *CSGetShader;
void *CSGetSamplers;
void *CSGetConstantBuffers;
void *ClearState;
void *Flush;
void *GetType;
void *GetContextFlags;
void *FinishCommandList;
};
struct d3ddevicecontext {
struct d3ddevicecontextVTBL** lpVtbl;
};