Implementing copy/paste in X11

I've been struggling with this for some time, so when I finally figured it out, I thought it would ne nice to write the tutorial I wish I had had.

Each window in the X server contains a list of properties which are key-value pairs, the keys themselves are numbers with an associated name called Atoms. You can get the Atom using its name via a function called XInternAtom, and you can get the value using a method called XGetWindowProperty. Each Window also has a Display which represents a connection to the window.

We can start by creating a window and a display,

Display* display = XOpenDisplay(NULL);
Window window = XCreateSimpleWindow(display, DefaultRootWindow(display), 0, 0, 100, 100, 0, 0, 0);

... define some Atoms:

Atom CLIPBOARD = XInternAtom(display , "CLIPBOARD", False);
Atom TARGETS = XInternAtom(display , "TARGETS", False);
Atom UTF8_STRING = XInternAtom(display, "UTF8_STRING", False);
Atom target = None;

... and set the data to be copied.

unsigned char copy_data[] = "Hello X11";
unsigned long copy_length = sizeof(data) - 1;

Next, we would like to inform the X server that we want to be the owner of the selection. That means that whenever some window wants to paste the data, the server will let it know that it should talk to us.

XSetSelectionOwner(display, CLIPBOARD, window, CurrentTime);

Next, someone has to request the data. Let us be that someone, but first we must ask for a list of available data formats, known as targets.

XConvertSelection(display, CLIPBOARD, TARGETS, CLIPBOARD, window, CurrentTime);

So a Selection event is fired at us asking for the list of targets, we only support utf8 strings, so we will answer with a list of length 1.

This is going to be our event loop:

while (true) {
	XEvent event;
	XNextEvent(display, &event);
	switch (event.type) {
		case SelectionRequest: {
			XSelectionRequestEvent request = event.xselectionrequest;	
			if (XGetSelectionOwner(display, CLIPBOARD) == window && request.selection == CLIPBOARD) {
				if (request.target == TARGETS && request.property != None) {
					XChangeProperty(request.display, request.requestor, request.property, 
						XA_ATOM, 32, PropModeReplace, (unsigned char*)&UTF8_STRING, 1);
					XSelectionEvent sendEvent;
					sendEvent.type = SelectionNotify;
					sendEvent.serial = request.serial;
					sendEvent.send_event = request.send_event;
					sendEvent.display = request.display;
					sendEvent.requestor = request.requestor;
					sendEvent.selection = request.selection;
					sendEvent.target = request.target;
					sendEvent.property = request.property;
					sendEvent.time = request.time;
					XSendEvent(display, request.requestor, 0, 0, (XEvent*)&sendEvent);
				}
			}
		} break;
	}
}

We will now get a SelectionNotify event asking us to choose a target from the list.

case SelectionNotify: {
	XSelectionEvent selection = event.xselection;
	if (selection.property != None) {
		Atom actualType;
		int actualFormat;
		unsigned long bytesAfter;
		unsigned char* data;
		unsigned long count;
		XGetWindowProperty(display, window, CLIPBOARD, 0, LONG_MAX, False, AnyPropertyType,
			&actualType, &actualFormat, &count, &bytesAfter, &data);
					
		if (selection.target == TARGETS) {
			Atom* list = (Atom*)data;
			for (unsigned long i = 0; i < count; i++) {
				if (list[i] == XA_STRING) {
					target = XA_STRING;
				}
				else if (list[i] == UTF8_STRING) {
					target = UTF8_STRING;
					break;
				}
			}
			if (target != None)
				XConvertSelection(display, CLIPBOARD, target, CLIPBOARD, window, CurrentTime);
		}
		if (data) XFree(data);
	}
} break;

Here I used XA_STRING as a fallback, this represents an ANSI string, if I don't get UTF8_STRING as an option, this will have to do. Technically, when retrieving window properties you are supposed to do so in a loop, like reading from a file, but here I'm cheesing it by passing LONG_MAX instead.

Next the window will get a SelectionRequest event with the chosen target, and finally we deliver the data.

case SelectionRequest: {
	XSelectionRequestEvent request = event.xselectionrequest;	
	if (XGetSelectionOwner(display, CLIPBOARD) == window && request.selection == CLIPBOARD) {
		if (request.target == TARGETS && request.property != None) {
			XChangeProperty(request.display, request.requestor, request.property, 
				XA_ATOM, 32, PropModeReplace, (unsigned char*)&UTF8_STRING, 1);
		else if (request.target == UTF8_STRING && request.property != None) {
			XChangeProperty(request.display, request.requestor, request.property, 
				request.target, 8, PropModeReplace, copy_data, copy_length);
		}
		XSelectionEvent sendEvent;
		sendEvent.type = SelectionNotify;
		sendEvent.serial = request.serial;
		sendEvent.send_event = request.send_event;
		sendEvent.display = request.display;
		sendEvent.requestor = request.requestor;
		sendEvent.selection = request.selection;
		sendEvent.target = request.target;
		sendEvent.property = request.property;
		sendEvent.time = request.time;
		XSendEvent(display, request.requestor, 0, 0, (XEvent*)&sendEvent);
	}
} break;

Now all that is left is to receive the data:

case SelectionNotify: {
	XSelectionEvent selection = event.xselection;
	if (selection.property != None) {
		Atom actualType;
		int actualFormat;
		unsigned long bytesAfter;
		unsigned char* data;
		unsigned long count;
		XGetWindowProperty(display, window, CLIPBOARD, 0, LONG_MAX, False, AnyPropertyType,
			&actualType, &actualFormat, &count, &bytesAfter, &data);
					
		if (selection.target == TARGETS) {
			Atom* list = (Atom*)data;
			for (unsigned long i = 0; i < count; i++) {
				if (list[i] == XA_STRING) {
					target = XA_STRING;
				}
				else if (list[i] == UTF8_STRING) {
					target = UTF8_STRING;
					break;
				}
			}
			if (target != None)
				XConvertSelection(display, CLIPBOARD, target, CLIPBOARD, window, CurrentTime);
		}
		else if (selection.target == target) {
			// the data is in {data, count}
			// do whatever with it, I personally use a callback function here.
		}
		if (data) XFree(data);
	}
} break;

And that's it.

Some closing notes regarding platform layer abstraction: In win32 you can have a single function that queries the clipboard and returns the pasted data, it is not possible to do the same with X11. For one thing. it takes several messages of communication between application/windows, and waiting for a response can take several frames even if everything goes smoothly. And for another, you can't trust the other app to even respond at all, you may find yourself in an infinite loop waiting for a response. The way I chose to do it, is instead of returning the data, I ask for a callback function, and this allows me to abstract over both win32 and X11.


Edited by eternalStudent on

Thanks.

Working with X11 can quickly become neck-beard inducing. Thank you for saving us all in this particular area.

I was just planning to implement that, so this will certainly be useful information.

There is one important thing missing from this. If application owns the clipboard data and then terminates before user had chance to use it, then user has lost the clipboard contents. Because other application won't have your app to ask it for clipboard contents. And users will get frustrated - they copied the text, closed application and cannot paste it.

That's why you should push the ownership of clipboard to clipboard manager when you're terminating. glfw does this with code here: https://github.com/glfw/glfw/blob/d299d9f78857e921b66bdab42c7ea27fe2e31810/src/x11_window.c#L1878-L1920 This function is called from glfwTerminate().

More information here: https://www.freedesktop.org/wiki/ClipboardManager/


Edited by Mārtiņš Možeiko on

I have a tutorial like this for X11, Cocoa, and Winapi.

I'm going to put a link to it here because I think it's a slightly better resource. It goes into a bit more detail and links to relevant documentation.

https://github.com/ColleagueRiley/Clipboard-Copy-Paste