I noticed a significant performance hit when creating an OpenGL window when using the GPU instead of the integrated graphics card. When I turn on NvOptimusEnablement I get roughly an 800 ms penalty for simple window creation. It seems like all the time is spent in ChoosePixelFormat. Not sure if there's a way to speed this up? Any advice would be awesome
Here's the benchmark:
Running with integrated graphics:
1 2 3 4 5 6 7 8 9 | λ .\opengl.exe |cat RegisterClassA: 0.08 ms CreateWindow: 4.64 ms ChoosePixelFormat: 36.78 ms SetPixelFormat: 36.94 ms CreateOpenGLWindow: 36.95 ms wglCreateContext: 45.76 ms wglMakeCurrent: 48.87 ms ShowWindow: 63.41 ms |
Running with GPU:
1 2 3 4 5 6 7 8 9 | λ .\opengl.exe |cat RegisterClassA: 0.12 ms CreateWindow: 5.28 ms ChoosePixelFormat: 831.04 ms SetPixelFormat: 831.19 ms CreateOpenGLWindow: 831.20 ms wglCreateContext: 831.81 ms wglMakeCurrent: 877.83 ms ShowWindow: 901.59 ms |
And here's the test file:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | // File: opengl.cpp // Build command (integrated graphics): // cl -Zi /I /MD opengl.cpp /link user32.lib opengl32.lib gdi32.lib // Build command (with GPU): // cl -Zi /I /MD -DUSE_GPU opengl.cpp /link user32.lib opengl32.lib gdi32.lib // Run: // .\opengl.exe | cat #include <windows.h> #include <gl/gl.h> #include <stdio.h> #ifdef USE_GPU #ifdef __cplusplus extern "C" { #endif __declspec(dllexport) DWORD NvOptimusEnablement = 0x01; __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x01; #ifdef __cplusplus } #endif #endif static unsigned long long GlobalPerfCountFrequency; static LARGE_INTEGER GameInitClock; #define DebugTime(name) printf("%s: %.2f ms\n", #name, Win32GetSecondsElapsed(GameInitClock, Win32GetWallClock()) * 1000) LARGE_INTEGER Win32GetWallClock(void) { LARGE_INTEGER Result; QueryPerformanceCounter(&Result); return Result; } double Win32GetSecondsElapsed(LARGE_INTEGER Start, LARGE_INTEGER End) { double Result = ((double)(End.QuadPart - Start.QuadPart) / (double)GlobalPerfCountFrequency); return Result; } void display() { glClear(GL_COLOR_BUFFER_BIT); glBegin(GL_TRIANGLES); glColor3f(1.0f, 0.0f, 0.0f); glVertex2i(0, 1); glColor3f(0.0f, 1.0f, 0.0f); glVertex2i(-1, -1); glColor3f(0.0f, 0.0f, 1.0f); glVertex2i(1, -1); glEnd(); glFlush(); } LONG WINAPI WindowProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { static PAINTSTRUCT ps; switch(uMsg) { case WM_PAINT: display(); BeginPaint(hWnd, &ps); EndPaint(hWnd, &ps); return 0; case WM_SIZE: glViewport(0, 0, LOWORD(lParam), HIWORD(lParam)); PostMessage(hWnd, WM_PAINT, 0, 0); return 0; case WM_CHAR: switch (wParam) { case 27: { ExitProcess(0); } break; } return 0; case WM_CLOSE: ExitProcess(0); return 0; } return DefWindowProc(hWnd, uMsg, wParam, lParam); } HWND CreateOpenGLWindow(char* title, int x, int y, int width, int height, BYTE type, DWORD flags) { static HINSTANCE hInstance = 0; if (!hInstance) { hInstance = GetModuleHandle(NULL); WNDCLASS wc; wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC; wc.lpfnWndProc = (WNDPROC)WindowProc; wc.cbClsExtra = 0; wc.cbWndExtra = 0; wc.hInstance = hInstance; wc.hIcon = LoadIcon(NULL, IDI_WINLOGO); wc.hCursor = LoadCursor(NULL, IDC_ARROW); wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH); wc.lpszMenuName = NULL; wc.lpszClassName = "OpenGL"; if (!RegisterClassA(&wc)) { MessageBox(NULL, "RegisterClass() failed: " "Cannot register window class.", "Error", MB_OK); return NULL; } DebugTime(RegisterClassA); } HWND hWnd = CreateWindow( "OpenGL", title, WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN, x, y, width, height, NULL, NULL, hInstance, NULL); DebugTime(CreateWindow); if (hWnd == NULL) { MessageBox(NULL, "CreateWindow() failed: Cannot create a window.", "Error", MB_OK); return NULL; } HDC hDC = GetDC(hWnd); PIXELFORMATDESCRIPTOR pfd; memset(&pfd, 0, sizeof(pfd)); pfd.nSize = sizeof(pfd); pfd.nVersion = 1; pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | flags; pfd.iPixelType = type; pfd.cColorBits = 32; int pf = ChoosePixelFormat(hDC, &pfd); DebugTime(ChoosePixelFormat); if (pf == 0) { MessageBox(NULL, "ChoosePixelFormat() failed: " "Cannot find a suitable pixel format.", "Error", MB_OK); return 0; } if (SetPixelFormat(hDC, pf, &pfd) == FALSE) { MessageBox(NULL, "SetPixelFormat() failed: " "Cannot set format specified.", "Error", MB_OK); return 0; } DebugTime(SetPixelFormat); ReleaseDC(hWnd, hDC); return hWnd; } int APIENTRY WinMain(HINSTANCE hCurrentInst, HINSTANCE hPreviousInst, LPSTR lpszCmdLine, int nCmdShow) { GameInitClock = Win32GetWallClock(); { LARGE_INTEGER PerfCountFrequencyResult; QueryPerformanceFrequency(&PerfCountFrequencyResult); GlobalPerfCountFrequency = PerfCountFrequencyResult.QuadPart; } HWND hWnd = CreateOpenGLWindow("minimal", 0, 0, 1280, 720, PFD_TYPE_RGBA, 0); DebugTime(CreateOpenGLWindow); if (hWnd == NULL) exit(1); HDC hDC = GetDC(hWnd); HGLRC hRC = wglCreateContext(hDC); DebugTime(wglCreateContext); wglMakeCurrent(hDC, hRC); DebugTime(wglMakeCurrent); ShowWindow(hWnd, nCmdShow); DebugTime(ShowWindow); fflush(stdout); MSG msg; while (GetMessage(&msg, hWnd, 0, 0)) { TranslateMessage(&msg); DispatchMessage(&msg); } return 0; } |
For context this is the graphics card I'm using: NVIDIA Corporation GeForce MX150/PCIe/SSE2
Also wondering if it's possible to call a function during program run-time that has a similar effect to NvOptimusEnablement? That way I could at least shift the perceived startup latency to a different point (e.g. after showing a loading screen). The only thing I can think of is to have two programs: one launcher that quickly displays a splash screen and then calls out to the main program executable. Ideally I would like these to be one executable (for simplicity)!
Thanks,
- Nick