Why Is OpenGL Window Creation Slow When Using GPU

Hi!

I noticed a significant performance hit when creating an OpenGL window when using the GPU instead of the integrated graphics card. When I turn on NvOptimusEnablement I get roughly an 800 ms penalty for simple window creation. It seems like all the time is spent in ChoosePixelFormat. Not sure if there's a way to speed this up? Any advice would be awesome

Here's the benchmark:

Running with integrated graphics:
1
2
3
4
5
6
7
8
9
λ .\opengl.exe |cat
RegisterClassA: 0.08 ms
CreateWindow: 4.64 ms
ChoosePixelFormat: 36.78 ms
SetPixelFormat: 36.94 ms
CreateOpenGLWindow: 36.95 ms
wglCreateContext: 45.76 ms
wglMakeCurrent: 48.87 ms
ShowWindow: 63.41 ms


Running with GPU:
1
2
3
4
5
6
7
8
9
λ .\opengl.exe |cat
RegisterClassA: 0.12 ms
CreateWindow: 5.28 ms
ChoosePixelFormat: 831.04 ms
SetPixelFormat: 831.19 ms
CreateOpenGLWindow: 831.20 ms
wglCreateContext: 831.81 ms
wglMakeCurrent: 877.83 ms
ShowWindow: 901.59 ms


And here's the test file:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// File: opengl.cpp

// Build command (integrated graphics):
// cl -Zi /I /MD opengl.cpp /link user32.lib opengl32.lib gdi32.lib

// Build command (with GPU):
// cl -Zi /I /MD -DUSE_GPU opengl.cpp /link user32.lib opengl32.lib gdi32.lib

// Run:
// .\opengl.exe | cat

#include <windows.h>
#include <gl/gl.h>
#include <stdio.h>

#ifdef USE_GPU
#ifdef __cplusplus
extern "C" {
#endif
__declspec(dllexport) DWORD NvOptimusEnablement = 0x01;
__declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x01;
#ifdef __cplusplus
}
#endif
#endif

static unsigned long long GlobalPerfCountFrequency;
static LARGE_INTEGER GameInitClock;

#define DebugTime(name) printf("%s: %.2f ms\n", #name, Win32GetSecondsElapsed(GameInitClock, Win32GetWallClock()) * 1000)

LARGE_INTEGER Win32GetWallClock(void) {
  LARGE_INTEGER Result;
  QueryPerformanceCounter(&Result);
  return Result;
}

double Win32GetSecondsElapsed(LARGE_INTEGER Start, LARGE_INTEGER End) {
  double Result = ((double)(End.QuadPart - Start.QuadPart) / (double)GlobalPerfCountFrequency);
  return Result;
}

void display() {
  glClear(GL_COLOR_BUFFER_BIT);
  glBegin(GL_TRIANGLES);
  glColor3f(1.0f, 0.0f, 0.0f);
  glVertex2i(0,  1);
  glColor3f(0.0f, 1.0f, 0.0f);
  glVertex2i(-1, -1);
  glColor3f(0.0f, 0.0f, 1.0f);
  glVertex2i(1, -1);
  glEnd();
  glFlush();
}


LONG WINAPI WindowProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam) {
  static PAINTSTRUCT ps;

  switch(uMsg) {
  case WM_PAINT:
    display();
    BeginPaint(hWnd, &ps);
    EndPaint(hWnd, &ps);
    return 0;

  case WM_SIZE:
    glViewport(0, 0, LOWORD(lParam), HIWORD(lParam));
    PostMessage(hWnd, WM_PAINT, 0, 0);
    return 0;

  case WM_CHAR:
    switch (wParam) {
      case 27: {
        ExitProcess(0);
      } break;
    }
    return 0;

  case WM_CLOSE:
    ExitProcess(0);
    return 0;
  }

  return DefWindowProc(hWnd, uMsg, wParam, lParam);
}

HWND CreateOpenGLWindow(char* title, int x, int y, int width, int height, BYTE type, DWORD flags) {
    static HINSTANCE hInstance = 0;

    if (!hInstance) {
        hInstance = GetModuleHandle(NULL);
        WNDCLASS wc;
        wc.style         = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
        wc.lpfnWndProc   = (WNDPROC)WindowProc;
        wc.cbClsExtra    = 0;
        wc.cbWndExtra    = 0;
        wc.hInstance     = hInstance;
        wc.hIcon         = LoadIcon(NULL, IDI_WINLOGO);
        wc.hCursor       = LoadCursor(NULL, IDC_ARROW);
        wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH);
        wc.lpszMenuName  = NULL;
        wc.lpszClassName = "OpenGL";

        if (!RegisterClassA(&wc)) {
            MessageBox(NULL, "RegisterClass() failed:  "
                "Cannot register window class.", "Error", MB_OK);
            return NULL;
        }
        DebugTime(RegisterClassA);
    }

    HWND hWnd = CreateWindow(
      "OpenGL", title, WS_OVERLAPPEDWINDOW | WS_CLIPSIBLINGS | WS_CLIPCHILDREN,
      x, y, width, height, NULL, NULL, hInstance, NULL);
    DebugTime(CreateWindow);

    if (hWnd == NULL) {
        MessageBox(NULL, "CreateWindow() failed:  Cannot create a window.",
            "Error", MB_OK);
        return NULL;
    }

    HDC hDC = GetDC(hWnd);

    PIXELFORMATDESCRIPTOR pfd;
    memset(&pfd, 0, sizeof(pfd));
    pfd.nSize        = sizeof(pfd);
    pfd.nVersion     = 1;
    pfd.dwFlags      = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | flags;
    pfd.iPixelType   = type;
    pfd.cColorBits   = 32;

    int pf = ChoosePixelFormat(hDC, &pfd);
    DebugTime(ChoosePixelFormat);
    if (pf == 0) {
        MessageBox(NULL, "ChoosePixelFormat() failed:  "
            "Cannot find a suitable pixel format.", "Error", MB_OK);
        return 0;
    }

    if (SetPixelFormat(hDC, pf, &pfd) == FALSE) {
        MessageBox(NULL, "SetPixelFormat() failed:  "
            "Cannot set format specified.", "Error", MB_OK);
        return 0;
    }
    DebugTime(SetPixelFormat);

    ReleaseDC(hWnd, hDC);

    return hWnd;
}

int APIENTRY WinMain(HINSTANCE hCurrentInst, HINSTANCE hPreviousInst, LPSTR lpszCmdLine, int nCmdShow) {
  GameInitClock = Win32GetWallClock();

  {
    LARGE_INTEGER PerfCountFrequencyResult;
    QueryPerformanceFrequency(&PerfCountFrequencyResult);
    GlobalPerfCountFrequency = PerfCountFrequencyResult.QuadPart;
  }

    HWND hWnd = CreateOpenGLWindow("minimal", 0, 0, 1280, 720, PFD_TYPE_RGBA, 0);
    DebugTime(CreateOpenGLWindow);

    if (hWnd == NULL) exit(1);

    HDC hDC = GetDC(hWnd);
    HGLRC hRC = wglCreateContext(hDC);
    DebugTime(wglCreateContext);

    wglMakeCurrent(hDC, hRC);
    DebugTime(wglMakeCurrent);

    ShowWindow(hWnd, nCmdShow);
    DebugTime(ShowWindow);

    fflush(stdout);

    MSG msg;
    while (GetMessage(&msg, hWnd, 0, 0)) {
        TranslateMessage(&msg);
        DispatchMessage(&msg);
    }

    return 0;
}


For context this is the graphics card I'm using: NVIDIA Corporation GeForce MX150/PCIe/SSE2

Also wondering if it's possible to call a function during program run-time that has a similar effect to NvOptimusEnablement? That way I could at least shift the perceived startup latency to a different point (e.g. after showing a loading screen). The only thing I can think of is to have two programs: one launcher that quickly displays a splash screen and then calls out to the main program executable. Ideally I would like these to be one executable (for simplicity)!

Thanks,

- Nick

Edited by nickav on
This looks like exactly same problem as discussed here: https://hero.handmade.network/for...ls_pixel_format_takes_a_long_time

Afaik there is no runtime functionality to optionally enable NvOptimusEnablement on Windows. This is why Direct3D is much more reliable on Windows - there you can explicitly choose GPU adapter to use for your D3D device.