2 // Copyright 2012 Francisco Jerez
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/memory.hpp"
29 using namespace clover;
32 typedef resource::vector vector_t;
35 vector(const size_t *p) {
40 pitch(const vector_t ®ion, vector_t pitch) {
41 for (auto x : zip(tail(pitch),
42 map(multiplies(), region, pitch))) {
43 // The spec defines a value of zero as the natural pitch,
44 // i.e. the unaligned size of the previous dimension.
45 if (std::get<0>(x) == 0)
46 std::get<0>(x) = std::get<1>(x);
53 /// Size of a region in bytes.
56 size(const vector_t &pitch, const vector_t ®ion) {
57 if (any_of(is_zero(), region))
60 return dot(pitch, region - vector_t{ 0, 1, 1 });
64 /// Common argument checking shared by memory transfer commands.
67 validate_common(command_queue &q,
68 const ref_vector<event> &deps) {
69 if (any_of([&](const event &ev) {
70 return ev.context() != q.context();
72 throw error(CL_INVALID_CONTEXT);
76 /// Common error checking for a buffer object argument.
79 validate_object(command_queue &q, buffer &mem, const vector_t &origin,
80 const vector_t &pitch, const vector_t ®ion) {
81 if (mem.context() != q.context())
82 throw error(CL_INVALID_CONTEXT);
84 // The region must fit within the specified pitch,
85 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
86 throw error(CL_INVALID_VALUE);
88 // ...and within the specified object.
89 if (dot(pitch, origin) + size(pitch, origin) > mem.size())
90 throw error(CL_INVALID_VALUE);
92 if (any_of(is_zero(), region))
93 throw error(CL_INVALID_VALUE);
97 /// Common error checking for an image argument.
100 validate_object(command_queue &q, image &img,
101 const vector_t &orig, const vector_t ®ion) {
102 vector_t size = { img.width(), img.height(), img.depth() };
104 if (img.context() != q.context())
105 throw error(CL_INVALID_CONTEXT);
107 if (any_of(greater(), orig + region, size))
108 throw error(CL_INVALID_VALUE);
110 if (any_of(is_zero(), region))
111 throw error(CL_INVALID_VALUE);
115 /// Common error checking for a host pointer argument.
118 validate_object(command_queue &q, const void *ptr, const vector_t &orig,
119 const vector_t &pitch, const vector_t ®ion) {
121 throw error(CL_INVALID_VALUE);
123 // The region must fit within the specified pitch.
124 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
125 throw error(CL_INVALID_VALUE);
129 /// Common argument checking for a copy between two buffer objects.
132 validate_copy(command_queue &q, buffer &dst_mem,
133 const vector_t &dst_orig, const vector_t &dst_pitch,
135 const vector_t &src_orig, const vector_t &src_pitch,
136 const vector_t ®ion) {
137 if (dst_mem == src_mem) {
138 auto dst_offset = dot(dst_pitch, dst_orig);
139 auto src_offset = dot(src_pitch, src_orig);
141 if (interval_overlaps()(
142 dst_offset, dst_offset + size(dst_pitch, region),
143 src_offset, src_offset + size(src_pitch, region)))
144 throw error(CL_MEM_COPY_OVERLAP);
149 /// Common argument checking for a copy between two image objects.
152 validate_copy(command_queue &q,
153 image &dst_img, const vector_t &dst_orig,
154 image &src_img, const vector_t &src_orig,
155 const vector_t ®ion) {
156 if (dst_img.format() != src_img.format())
157 throw error(CL_IMAGE_FORMAT_MISMATCH);
159 if (dst_img == src_img) {
160 if (all_of(interval_overlaps(),
161 dst_orig, dst_orig + region,
162 src_orig, src_orig + region))
163 throw error(CL_MEM_COPY_OVERLAP);
168 /// Class that encapsulates the task of mapping an object of type
169 /// \a T. The return value of get() should be implicitly
170 /// convertible to \a void *.
175 get(command_queue &q, T obj, cl_map_flags flags,
176 size_t offset, size_t size) {
177 return { q, obj->resource(q), flags, true,
178 {{ offset }}, {{ size, 1, 1 }} };
183 struct _map<void *> {
185 get(command_queue &q, void *obj, cl_map_flags flags,
186 size_t offset, size_t size) {
187 return (char *)obj + offset;
192 struct _map<const void *> {
194 get(command_queue &q, const void *obj, cl_map_flags flags,
195 size_t offset, size_t size) {
196 return (const char *)obj + offset;
201 /// Software copy from \a src_obj to \a dst_obj. They can be
202 /// either pointers or memory objects.
204 template<typename T, typename S>
205 std::function<void (event &)>
206 soft_copy_op(command_queue &q,
207 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
208 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
209 const vector_t ®ion) {
210 return [=, &q](event &) {
211 auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
212 dot(dst_pitch, dst_orig),
213 size(dst_pitch, region));
214 auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
215 dot(src_pitch, src_orig),
216 size(src_pitch, region));
219 for (v[2] = 0; v[2] < region[2]; ++v[2]) {
220 for (v[1] = 0; v[1] < region[1]; ++v[1]) {
222 static_cast<char *>(dst) + dot(dst_pitch, v),
223 static_cast<const char *>(src) + dot(src_pitch, v),
224 src_pitch[0] * region[0]);
231 /// Hardware copy from \a src_obj to \a dst_obj.
233 template<typename T, typename S>
234 std::function<void (event &)>
235 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
236 S src_obj, const vector_t &src_orig, const vector_t ®ion) {
237 return [=, &q](event &) {
238 dst_obj->resource(q).copy(q, dst_orig, region,
239 src_obj->resource(q), src_orig);
245 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
246 size_t offset, size_t size, void *ptr,
247 cl_uint num_deps, const cl_event *d_deps,
248 cl_event *rd_ev) try {
250 auto &mem = obj<buffer>(d_mem);
251 auto deps = objs<wait_list_tag>(d_deps, num_deps);
252 vector_t region = { size, 1, 1 };
253 vector_t obj_origin = { offset };
254 auto obj_pitch = pitch(region, {{ 1 }});
256 validate_common(q, deps);
257 validate_object(q, ptr, {}, obj_pitch, region);
258 validate_object(q, mem, obj_origin, obj_pitch, region);
260 auto hev = create<hard_event>(
261 q, CL_COMMAND_READ_BUFFER, deps,
262 soft_copy_op(q, ptr, {}, obj_pitch,
263 &mem, obj_origin, obj_pitch,
266 ret_object(rd_ev, hev);
274 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
275 size_t offset, size_t size, const void *ptr,
276 cl_uint num_deps, const cl_event *d_deps,
277 cl_event *rd_ev) try {
279 auto &mem = obj<buffer>(d_mem);
280 auto deps = objs<wait_list_tag>(d_deps, num_deps);
281 vector_t region = { size, 1, 1 };
282 vector_t obj_origin = { offset };
283 auto obj_pitch = pitch(region, {{ 1 }});
285 validate_common(q, deps);
286 validate_object(q, mem, obj_origin, obj_pitch, region);
287 validate_object(q, ptr, {}, obj_pitch, region);
289 auto hev = create<hard_event>(
290 q, CL_COMMAND_WRITE_BUFFER, deps,
291 soft_copy_op(q, &mem, obj_origin, obj_pitch,
295 ret_object(rd_ev, hev);
303 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
304 const size_t *p_obj_origin,
305 const size_t *p_host_origin,
306 const size_t *p_region,
307 size_t obj_row_pitch, size_t obj_slice_pitch,
308 size_t host_row_pitch, size_t host_slice_pitch,
310 cl_uint num_deps, const cl_event *d_deps,
311 cl_event *rd_ev) try {
313 auto &mem = obj<buffer>(d_mem);
314 auto deps = objs<wait_list_tag>(d_deps, num_deps);
315 auto region = vector(p_region);
316 auto obj_origin = vector(p_obj_origin);
317 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
318 auto host_origin = vector(p_host_origin);
319 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
321 validate_common(q, deps);
322 validate_object(q, ptr, host_origin, host_pitch, region);
323 validate_object(q, mem, obj_origin, obj_pitch, region);
325 auto hev = create<hard_event>(
326 q, CL_COMMAND_READ_BUFFER_RECT, deps,
327 soft_copy_op(q, ptr, host_origin, host_pitch,
328 &mem, obj_origin, obj_pitch,
331 ret_object(rd_ev, hev);
339 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
340 const size_t *p_obj_origin,
341 const size_t *p_host_origin,
342 const size_t *p_region,
343 size_t obj_row_pitch, size_t obj_slice_pitch,
344 size_t host_row_pitch, size_t host_slice_pitch,
346 cl_uint num_deps, const cl_event *d_deps,
347 cl_event *rd_ev) try {
349 auto &mem = obj<buffer>(d_mem);
350 auto deps = objs<wait_list_tag>(d_deps, num_deps);
351 auto region = vector(p_region);
352 auto obj_origin = vector(p_obj_origin);
353 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
354 auto host_origin = vector(p_host_origin);
355 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
357 validate_common(q, deps);
358 validate_object(q, mem, obj_origin, obj_pitch, region);
359 validate_object(q, ptr, host_origin, host_pitch, region);
361 auto hev = create<hard_event>(
362 q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
363 soft_copy_op(q, &mem, obj_origin, obj_pitch,
364 ptr, host_origin, host_pitch,
367 ret_object(rd_ev, hev);
375 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
376 size_t src_offset, size_t dst_offset, size_t size,
377 cl_uint num_deps, const cl_event *d_deps,
378 cl_event *rd_ev) try {
380 auto &src_mem = obj<buffer>(d_src_mem);
381 auto &dst_mem = obj<buffer>(d_dst_mem);
382 auto deps = objs<wait_list_tag>(d_deps, num_deps);
383 vector_t region = { size, 1, 1 };
384 vector_t dst_origin = { dst_offset };
385 auto dst_pitch = pitch(region, {{ 1 }});
386 vector_t src_origin = { src_offset };
387 auto src_pitch = pitch(region, {{ 1 }});
389 validate_common(q, deps);
390 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
391 validate_object(q, src_mem, src_origin, src_pitch, region);
392 validate_copy(q, dst_mem, dst_origin, dst_pitch,
393 src_mem, src_origin, src_pitch, region);
395 auto hev = create<hard_event>(
396 q, CL_COMMAND_COPY_BUFFER, deps,
397 hard_copy_op(q, &dst_mem, dst_origin,
398 &src_mem, src_origin, region));
400 ret_object(rd_ev, hev);
408 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
410 const size_t *p_src_origin, const size_t *p_dst_origin,
411 const size_t *p_region,
412 size_t src_row_pitch, size_t src_slice_pitch,
413 size_t dst_row_pitch, size_t dst_slice_pitch,
414 cl_uint num_deps, const cl_event *d_deps,
415 cl_event *rd_ev) try {
417 auto &src_mem = obj<buffer>(d_src_mem);
418 auto &dst_mem = obj<buffer>(d_dst_mem);
419 auto deps = objs<wait_list_tag>(d_deps, num_deps);
420 auto region = vector(p_region);
421 auto dst_origin = vector(p_dst_origin);
422 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
423 auto src_origin = vector(p_src_origin);
424 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
426 validate_common(q, deps);
427 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
428 validate_object(q, src_mem, src_origin, src_pitch, region);
429 validate_copy(q, dst_mem, dst_origin, dst_pitch,
430 src_mem, src_origin, src_pitch, region);
432 auto hev = create<hard_event>(
433 q, CL_COMMAND_COPY_BUFFER_RECT, deps,
434 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
435 &src_mem, src_origin, src_pitch,
438 ret_object(rd_ev, hev);
446 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
447 const size_t *p_origin, const size_t *p_region,
448 size_t row_pitch, size_t slice_pitch, void *ptr,
449 cl_uint num_deps, const cl_event *d_deps,
450 cl_event *rd_ev) try {
452 auto &img = obj<image>(d_mem);
453 auto deps = objs<wait_list_tag>(d_deps, num_deps);
454 auto region = vector(p_region);
455 auto dst_pitch = pitch(region, {{ img.pixel_size(),
456 row_pitch, slice_pitch }});
457 auto src_origin = vector(p_origin);
458 auto src_pitch = pitch(region, {{ img.pixel_size(),
459 img.row_pitch(), img.slice_pitch() }});
461 validate_common(q, deps);
462 validate_object(q, ptr, {}, dst_pitch, region);
463 validate_object(q, img, src_origin, region);
465 auto hev = create<hard_event>(
466 q, CL_COMMAND_READ_IMAGE, deps,
467 soft_copy_op(q, ptr, {}, dst_pitch,
468 &img, src_origin, src_pitch,
471 ret_object(rd_ev, hev);
479 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
480 const size_t *p_origin, const size_t *p_region,
481 size_t row_pitch, size_t slice_pitch, const void *ptr,
482 cl_uint num_deps, const cl_event *d_deps,
483 cl_event *rd_ev) try {
485 auto &img = obj<image>(d_mem);
486 auto deps = objs<wait_list_tag>(d_deps, num_deps);
487 auto region = vector(p_region);
488 auto dst_origin = vector(p_origin);
489 auto dst_pitch = pitch(region, {{ img.pixel_size(),
490 img.row_pitch(), img.slice_pitch() }});
491 auto src_pitch = pitch(region, {{ img.pixel_size(),
492 row_pitch, slice_pitch }});
494 validate_common(q, deps);
495 validate_object(q, img, dst_origin, region);
496 validate_object(q, ptr, {}, src_pitch, region);
498 auto hev = create<hard_event>(
499 q, CL_COMMAND_WRITE_IMAGE, deps,
500 soft_copy_op(q, &img, dst_origin, dst_pitch,
504 ret_object(rd_ev, hev);
512 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
513 const size_t *p_src_origin, const size_t *p_dst_origin,
514 const size_t *p_region,
515 cl_uint num_deps, const cl_event *d_deps,
516 cl_event *rd_ev) try {
518 auto &src_img = obj<image>(d_src_mem);
519 auto &dst_img = obj<image>(d_dst_mem);
520 auto deps = objs<wait_list_tag>(d_deps, num_deps);
521 auto region = vector(p_region);
522 auto dst_origin = vector(p_dst_origin);
523 auto src_origin = vector(p_src_origin);
525 validate_common(q, deps);
526 validate_object(q, dst_img, dst_origin, region);
527 validate_object(q, src_img, src_origin, region);
528 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
530 auto hev = create<hard_event>(
531 q, CL_COMMAND_COPY_IMAGE, deps,
532 hard_copy_op(q, &dst_img, dst_origin,
533 &src_img, src_origin,
536 ret_object(rd_ev, hev);
544 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
545 cl_mem d_src_mem, cl_mem d_dst_mem,
546 const size_t *p_src_origin, const size_t *p_region,
548 cl_uint num_deps, const cl_event *d_deps,
549 cl_event *rd_ev) try {
551 auto &src_img = obj<image>(d_src_mem);
552 auto &dst_mem = obj<buffer>(d_dst_mem);
553 auto deps = objs<wait_list_tag>(d_deps, num_deps);
554 auto region = vector(p_region);
555 vector_t dst_origin = { dst_offset };
556 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
557 auto src_origin = vector(p_src_origin);
558 auto src_pitch = pitch(region, {{ src_img.pixel_size(),
560 src_img.slice_pitch() }});
562 validate_common(q, deps);
563 validate_object(q, dst_mem, dst_origin, dst_pitch, region);
564 validate_object(q, src_img, src_origin, region);
566 auto hev = create<hard_event>(
567 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
568 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
569 &src_img, src_origin, src_pitch,
572 ret_object(rd_ev, hev);
580 clEnqueueCopyBufferToImage(cl_command_queue d_q,
581 cl_mem d_src_mem, cl_mem d_dst_mem,
583 const size_t *p_dst_origin, const size_t *p_region,
584 cl_uint num_deps, const cl_event *d_deps,
585 cl_event *rd_ev) try {
587 auto &src_mem = obj<buffer>(d_src_mem);
588 auto &dst_img = obj<image>(d_dst_mem);
589 auto deps = objs<wait_list_tag>(d_deps, num_deps);
590 auto region = vector(p_region);
591 auto dst_origin = vector(p_dst_origin);
592 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
594 dst_img.slice_pitch() }});
595 vector_t src_origin = { src_offset };
596 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
598 validate_common(q, deps);
599 validate_object(q, dst_img, dst_origin, region);
600 validate_object(q, src_mem, src_origin, src_pitch, region);
602 auto hev = create<hard_event>(
603 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
604 soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
605 &src_mem, src_origin, src_pitch,
608 ret_object(rd_ev, hev);
616 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
617 cl_map_flags flags, size_t offset, size_t size,
618 cl_uint num_deps, const cl_event *d_deps,
619 cl_event *rd_ev, cl_int *r_errcode) try {
621 auto &mem = obj<buffer>(d_mem);
622 auto deps = objs<wait_list_tag>(d_deps, num_deps);
623 vector_t region = { size, 1, 1 };
624 vector_t obj_origin = { offset };
625 auto obj_pitch = pitch(region, {{ 1 }});
627 validate_common(q, deps);
628 validate_object(q, mem, obj_origin, obj_pitch, region);
630 void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
632 ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps));
633 ret_error(r_errcode, CL_SUCCESS);
637 ret_error(r_errcode, e);
642 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
644 const size_t *p_origin, const size_t *p_region,
645 size_t *row_pitch, size_t *slice_pitch,
646 cl_uint num_deps, const cl_event *d_deps,
647 cl_event *rd_ev, cl_int *r_errcode) try {
649 auto &img = obj<image>(d_mem);
650 auto deps = objs<wait_list_tag>(d_deps, num_deps);
651 auto region = vector(p_region);
652 auto origin = vector(p_origin);
654 validate_common(q, deps);
655 validate_object(q, img, origin, region);
657 void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
659 ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps));
660 ret_error(r_errcode, CL_SUCCESS);
664 ret_error(r_errcode, e);
669 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
670 cl_uint num_deps, const cl_event *d_deps,
671 cl_event *rd_ev) try {
673 auto &mem = obj(d_mem);
674 auto deps = objs<wait_list_tag>(d_deps, num_deps);
676 validate_common(q, deps);
678 auto hev = create<hard_event>(
679 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
680 [=, &q, &mem](event &) {
681 mem.resource(q).del_map(ptr);
684 ret_object(rd_ev, hev);