QIType m_Type;
+ QTheoraCSC_t* m_csc;
+
INT32 m_Stride;
INT32 m_Rasters;
QImageConverter() :
m_Type(IT_Unknown),
+ m_csc(0),
m_Stride(0),
m_Rasters(0),
m_x(0),
m_cx(0),
m_cy(0)
{
+ m_csc = QT_CreateCSC();
+ }
+
+ ~QImageConverter()
+ {
+ QT_ReleaseCSC(m_csc);
}
void Setup(
m_cx = rc->right - rc->left;
m_cy = rc->bottom - rc->top;
+
+ switch (type) {
+ case IT_YV12:
+ QT_SetupCSC(m_csc, QTCS_YV12);
+ break;
+ }
}
+#if 0
void Fill(
VOID* pv,
INT32 val)
} // switch
}
+#endif
void Copy(
VOID* pv,
const QT_Output* frame)
{
+ QT_Frame_t q = {
+ pv,
+ m_Stride,
+ m_Rasters,
+ m_x,
+ m_y,
+ m_cx,
+ m_cy
+ };
+
switch (m_Type) {
case IT_YV12:
- {
- UINT8* pb0 = static_cast<UINT8*>(pv);
- UINT8* pb1 = pb0 + m_Rasters * m_Stride;
- UINT8* pb2 = pb1 + m_Rasters * m_Stride / 4;
-
- pb0 += m_y * m_Stride + m_x;
- pb1 += m_y * m_Stride / 2 + m_x / 2;
- pb2 += m_y * m_Stride / 2 + m_x / 2;
-
- const UINT8* s0 = frame->Plane[0] + (frame->CY - 1) * frame->CX;
- const UINT8* s1 = frame->Plane[2] + (frame->CY / 2 - 1) * frame->CX / 2;
- const UINT8* s2 = frame->Plane[1] + (frame->CY / 2 - 1) * frame->CX / 2;
-
- UINT8* end = pb0 + m_cy * m_Stride;
- while (pb0 < end) {
- memcpy(pb0, s0, m_cx);
- pb0 += m_Stride;
- s0 -= frame->CX;
- }
-
- end = pb1 + (m_cy / 2) * (m_Stride / 2);
- while (pb1 < end) {
- memcpy(pb1, s1, m_cx);
- pb1 += m_Stride / 2;
- s1 -= frame->CX / 2;
- }
-
- end = pb2 + (m_cy / 2) * (m_Stride / 2);
- while (pb2 < end) {
- memcpy(pb2, s2, m_cx);
- pb2 += m_Stride / 2;
- s2 -= frame->CX / 2;
- }
-
+ QT_ConvertFrame(m_csc, frame, &q);
break;
- }
} // switch
}
--- /dev/null
+/* CSConverter.c */
+/* 2009/06/19 */
+
+#include "StdAfx.h"
+
+#include "TheoraDecoder.h"
+
+#include "CSConverter.h"
+
+/* */
+
+void QT_CSConvert_YV12(
+ const QT_Output_t* output,
+ QT_Frame_t* frame)
+{
+ UINT8* pb0 = (UINT8*)(frame->Frame);
+ UINT8* pb1 = pb0 + frame->Rasters * frame->Pitch;
+ UINT8* pb2 = pb1 + frame->Rasters * frame->Pitch / 4;
+ UINT8* end;
+
+ const UINT8* s0 = output->Plane[0] + (output->CY - 1) * output->CX;
+ const UINT8* s1 = output->Plane[2] + (output->CY / 2 - 1) * output->CX / 2;
+ const UINT8* s2 = output->Plane[1] + (output->CY / 2 - 1) * output->CX / 2;
+
+ pb0 += frame->Y * frame->Pitch + frame->X;
+ pb1 += frame->Y * frame->Pitch / 2 + frame->X / 2;
+ pb2 += frame->Y * frame->Pitch / 2 + frame->X / 2;
+
+ end = pb0 + output->CY * frame->Pitch;
+ while (pb0 < end) {
+ memcpy(pb0, s0, output->CX);
+ pb0 += frame->Pitch;
+ s0 -= output->CX;
+ }
+
+ end = pb1 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb1 < end) {
+ memcpy(pb1, s1, output->CX / 2);
+ pb1 += frame->Pitch / 2;
+ s1 -= output->CX / 2;
+ }
+
+ end = pb2 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb2 < end) {
+ memcpy(pb2, s2, output->CX / 2);
+ pb2 += frame->Pitch / 2;
+ s2 -= output->CX / 2;
+ }
+}
+
+void QT_CSConvert_YUY2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame)
+{
+}
+
+/* */
+
--- /dev/null
+/* CSConverter.h */
+/* 2009/06/19 */
+
+#pragma once
+
+void QT_CSConvert_YV12(
+ const QT_Output_t* output,
+ QT_Frame_t* frame);
+
+void QT_CSConvert_YUY2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame);
+
+/* */
+
+void QT_CSConvert_YV12_SSE2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame);
+
+void QT_CSConvert_YUY2_SSE2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame);
+
--- /dev/null
+/* CSConverter_SSE2.c */
+/* 2009/06/19 */
+
+#include "StdAfx.h"
+
+#include "TheoraDecoder.h"
+
+#include "CSConverter.h"
+
+/* */
+
+#pragma warning(disable : 4799)
+
+/* */
+
+static __inline void CopyCSC_16(
+ UINT8* d,
+ const UINT8* s,
+ INT32 cx)
+{
+ UINT8* p = d;
+ UINT8* e = p + cx;
+
+ const UINT8* q = s;
+
+ for (; p < e; p += 16, q += 16) {
+ _mm_store_si128((__m128i*)p, _mm_load_si128((const __m128i*)q));
+ }
+}
+
+static __inline void CopyCSC_8(
+ UINT8* d,
+ const UINT8* s,
+ INT32 cx)
+{
+ UINT8* p = d;
+ UINT8* e = p + cx;
+
+ const UINT8* q = s;
+
+ for (; p < e; p += 8, q += 8) {
+ *((__m64*)p) = *((const __m64*)q);
+ }
+}
+
+/* */
+
+void QT_CSConvert_YV12_SSE2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame)
+{
+ UINT8* pb0 = (UINT8*)(frame->Frame);
+ UINT8* pb1 = pb0 + frame->Rasters * frame->Pitch;
+ UINT8* pb2 = pb1 + frame->Rasters * frame->Pitch / 4;
+ UINT8* end;
+
+ const UINT8* s0 = output->Plane[0] + (output->CY - 1) * output->CX;
+ const UINT8* s1 = output->Plane[2] + (output->CY / 2 - 1) * output->CX / 2;
+ const UINT8* s2 = output->Plane[1] + (output->CY / 2 - 1) * output->CX / 2;
+
+ INT32 cx2 = output->CX / 2;
+
+ pb0 += frame->Y * frame->Pitch + frame->X;
+ pb1 += frame->Y * frame->Pitch / 2 + frame->X / 2;
+ pb2 += frame->Y * frame->Pitch / 2 + frame->X / 2;
+
+ end = pb0 + output->CY * frame->Pitch;
+ while (pb0 < end) {
+ CopyCSC_16(pb0, s0, output->CX);
+ pb0 += frame->Pitch;
+ s0 -= output->CX;
+ }
+
+ if ((cx2 & 0xf) == 0) {
+ end = pb1 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb1 < end) {
+ CopyCSC_16(pb1, s1, cx2);
+ pb1 += frame->Pitch / 2;
+ s1 -= cx2;
+ }
+
+ end = pb2 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb2 < end) {
+ CopyCSC_16(pb2, s2, cx2);
+ pb2 += frame->Pitch / 2;
+ s2 -= cx2;
+ }
+
+ } else {
+ end = pb1 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb1 < end) {
+ CopyCSC_8(pb1, s1, cx2);
+ pb1 += frame->Pitch / 2;
+ s1 -= cx2;
+ }
+
+ end = pb2 + (output->CY / 2) * (frame->Pitch / 2);
+ while (pb2 < end) {
+ CopyCSC_8(pb2, s2, cx2);
+ pb2 += frame->Pitch / 2;
+ s2 -= cx2;
+ }
+ }
+
+ _mm_empty();
+}
+
+void QT_CSConvert_YUY2_SSE2(
+ const QT_Output_t* output,
+ QT_Frame_t* frame)
+{
+}
+
+/* */
+
>
</File>
<File
+ RelativePath=".\CSConverter.c"
+ >
+ </File>
+ <File
+ RelativePath=".\CSConverter_SSE2.c"
+ >
+ <FileConfiguration
+ Name="Debug|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AssemblerOutput="2"
+ />
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32"
+ >
+ <Tool
+ Name="VCCLCompilerTool"
+ AssemblerOutput="2"
+ />
+ </FileConfiguration>
+ </File>
+ <File
RelativePath=".\Dequantize.c"
>
</File>
>
</File>
<File
+ RelativePath=".\CSConverter.h"
+ >
+ </File>
+ <File
RelativePath=".\Dequantize.h"
>
</File>
#include "FrameDecoder.h"
+#include "CSConverter.h"
+
/* */
BOOL g_QT_Enable_SSE2 = FALSE;
return TRUE;
}
+/* */
+
+struct QTheoraCSC {
+
+ VOID (*Convert)(const QT_Output_t*, QT_Frame_t*);
+
+}; /* QTheoraCSC */
+
+/* */
+
+QTheoraCSC_t* QT_CreateCSC(void)
+{
+ QTheoraCSC_t* t = (QTheoraCSC_t*)malloc(sizeof(QTheoraCSC_t));
+ if (t == NULL) {
+ return NULL;
+ }
+
+ memset(t, 0, sizeof(QTheoraCSC_t));
+
+ return t;
+}
+
+void QT_ReleaseCSC(QTheoraCSC_t* t)
+{
+ if (t != NULL) {
+ free(t);
+ }
+}
+
+BOOL QT_SetupCSC(
+ QTheoraCSC_t* t,
+ INT32 cs)
+{
+ extern BOOL g_QT_Enable_SSE2;
+
+ switch (cs) {
+ case QTCS_YV12:
+ if (g_QT_Enable_SSE2) {
+ t->Convert = QT_CSConvert_YV12_SSE2;
+ } else {
+ t->Convert = QT_CSConvert_YV12;
+ }
+ break;
+
+ case QTCS_YUY2:
+ if (g_QT_Enable_SSE2) {
+ t->Convert = QT_CSConvert_YUY2_SSE2;
+ } else {
+ t->Convert = QT_CSConvert_YUY2;
+ }
+ break;
+
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+BOOL QT_ConvertFrame(
+ QTheoraCSC_t* t,
+ const QT_Output_t* output,
+ QT_Frame_t* frame)
+{
+ if (output->CX > frame->CX ||
+ output->CY > frame->CY) {
+ return FALSE;
+ }
+
+ t->Convert(output, frame);
+
+ return TRUE;
+}
+
+/* */
+
/* */
+#define QTCS_YV12 0
+#define QTCS_YUY2 1
+
+struct QTheoraCSC;
+
+typedef struct QTheoraCSC QTheoraCSC_t;
+
+QTheoraCSC_t* QT_CreateCSC(void);
+
+void QT_ReleaseCSC(QTheoraCSC_t* t);
+
+BOOL QT_SetupCSC(
+ QTheoraCSC_t* t,
+ INT32 cs);
+
+struct QT_Frame {
+
+ VOID* Frame;
+ INT32 Pitch;
+ INT32 Rasters;
+
+ INT32 X;
+ INT32 Y;
+
+ INT32 CX;
+ INT32 CY;
+
+};
+
+typedef struct QT_Frame QT_Frame_t;
+
+BOOL QT_ConvertFrame(
+ QTheoraCSC_t* t,
+ const QT_Output_t* output,
+ QT_Frame_t* frame);
+
+/* */
+
#ifdef __cplusplus
}
#endif /* __cplusplus */