diff --git a/components/display/core/gds.h b/components/display/core/gds.h index bfbc29e4..29f3e8c7 100644 --- a/components/display/core/gds.h +++ b/components/display/core/gds.h @@ -5,11 +5,11 @@ #include /* NOTE for drivers: - The build-in DrawPixel(Fast), DrawCBR and ClearWindow are optimized for 1 bit - and 4 bits screen depth. For any other type of screen, DrawCBR and ClearWindow - default to use DrawPixel, which is very sub-optimal. For such other depth, you - must supply the DrawPixelFast. The built-in 1 bit depth function are only for - screen with vertical framing (1 byte = 8 lines). For example SSD1326 in + The build-in DrawPixel(Fast), DrawCBR and ClearWindow have optimized for 1 bit + and 4 bits grayscale screen depth and 8, 16, 24 color. For any other type of screen, + DrawCBR and ClearWindow default to use DrawPixel, which is very sub-optimal. For + other depth, you must supply the DrawPixelFast. The built-in 1 bit depth function + are only for screen with vertical framing (1 byte = 8 lines). For example SSD1326 in monochrome mode is not such type of screen, SH1106 and SSD1306 are */ diff --git a/components/display/core/gds_image.c b/components/display/core/gds_image.c index 467cca8c..1707c3d6 100644 --- a/components/display/core/gds_image.c +++ b/components/display/core/gds_image.c @@ -42,31 +42,31 @@ typedef struct { * monochrome (0.2125 * color.r) + (0.7154 * color.g) + (0.0721 * color.b) * grayscale (0.3 * R) + (0.59 * G) + (0.11 * B) ) */ -inline int Scaler332(uint8_t *Pixels) { +static inline int Scaler332(uint8_t *Pixels) { return (Pixels[2] & ~0x1f) | ((Pixels[1] & ~0x1f) >> 3) | (Pixels[0] >> 6); } -inline int Scaler444(uint8_t *Pixels) { +static inline int Scaler444(uint8_t *Pixels) { return ((Pixels[2] & ~0x0f) << 4) | (Pixels[1] & ~0x0f) | (Pixels[0] >> 4); } -inline int Scaler555(uint8_t *Pixels) { +static inline int Scaler555(uint8_t *Pixels) { return ((Pixels[2] & ~0x07) << 7) | ((Pixels[1] & ~0x07) << 2) | (Pixels[0] >> 3); } -inline int Scaler565(uint8_t *Pixels) { +static inline int Scaler565(uint8_t *Pixels) { return ((Pixels[2] & ~0x07) << 8) | ((Pixels[1] & ~0x03) << 3) | (Pixels[0] >> 3); } -inline int Scaler666(uint8_t *Pixels) { +static inline int Scaler666(uint8_t *Pixels) { return ((Pixels[2] & ~0x03) << 10) | ((Pixels[1] & ~0x03) << 4) | (Pixels[0] >> 2); } -inline int Scaler888(uint8_t *Pixels) { +static inline int Scaler888(uint8_t *Pixels) { return (Pixels[2] << 16) | (Pixels[1] << 8) | Pixels[0]; } -inline int ScalerGray(uint8_t *Pixels) { +static inline int ScalerGray(uint8_t *Pixels) { return (Pixels[2] * 14 + Pixels[1] * 76 + Pixels[0] * 38) >> 7; } @@ -236,37 +236,37 @@ void GDS_GetJPEGSize(uint8_t *Source, int *Width, int *Height) { * grayscale (0.3 * R) + (0.59 * G) + (0.11 * B) ) */ -inline int ToGray888(uint8_t **Pixel) { +static inline int ToGray888(uint8_t **Pixel) { uint32_t v = *(*Pixel)++; v |= *(*Pixel)++ << 8; v |= *(*Pixel)++ << 16; return (((v & 0xff) * 14) + ((v >> 8) & 0xff) * 76 + ((v >> 16) * 38) + 1) >> 7; } -inline int ToGray666(uint8_t **Pixel) { +static inline int ToGray666(uint8_t **Pixel) { uint32_t v = *(*Pixel)++; v |= *(*Pixel)++ << 8; v |= *(*Pixel)++ << 16; return (((v & 0x3f) * 14) + ((v >> 6) & 0x3f) * 76 + ((v >> 12) * 38) + 1) >> 7; } -inline int ToGray565(uint16_t **Pixel) { +static inline int ToGray565(uint16_t **Pixel) { uint16_t v = *(*Pixel)++; return ((((v & 0x1f) * 14) << 1) + ((v >> 5) & 0x3f) * 76 + (((v >> 11) * 38) << 1) + 1) >> 7; } -inline int ToGray555(uint16_t **Pixel) { +static inline int ToGray555(uint16_t **Pixel) { uint16_t v = *(*Pixel)++; return ((v & 0x1f) * 14 + ((v >> 5) & 0x1f) * 76 + (v >> 10) * 38) >> 7; } -inline int ToGray444(uint16_t **Pixel) { +static inline int ToGray444(uint16_t **Pixel) { uint16_t v = *(*Pixel)++; return ((v & 0x0f) * 14 + ((v >> 4) & 0x0f) * 76 + (v >> 8) * 38) >> 7; } -inline int ToGray332(uint8_t **Pixel) { +static inline int ToGray332(uint8_t **Pixel) { uint8_t v = *(*Pixel)++; return ((((v & 0x3) * 14) << 1) + ((v >> 2) & 0x7) * 76 + (v >> 5) * 38 + 1) >> 7; } -inline int ToSelf(uint8_t **Pixel) { +static inline int ToSelf(uint8_t **Pixel) { return *(*Pixel)++; } diff --git a/components/squeezelite/opus.c b/components/squeezelite/opus.c index f20ebe42..9a3d2286 100644 --- a/components/squeezelite/opus.c +++ b/components/squeezelite/opus.c @@ -30,7 +30,9 @@ * thread has a higher priority. Using an interim buffer where opus decoder writes the output is not great from * an efficiency (one extra memory copy) point of view, but it allows the lock to not be kept for too long */ +#if EMBEDDED #define FRAME_BUF 2048 +#endif #if BYTES_PER_FRAME == 4 #define ALIGN(n) (n) @@ -151,16 +153,14 @@ static decode_state opus_decompress(void) { LOG_INFO("setting track_start"); } -#if !FRAME_BUF - LOCK_O_direct; -#endif - #if FRAME_BUF IF_DIRECT( frames = min(_buf_space(outputbuf), _buf_cont_write(outputbuf)) / BYTES_PER_FRAME; + frames = min(frames, FRAME_BUF); write_buf = u->write_buf; ); #else + LOCK_O_direct; IF_DIRECT( frames = min(_buf_space(outputbuf), _buf_cont_write(outputbuf)) / BYTES_PER_FRAME; write_buf = outputbuf->writep; @@ -171,10 +171,7 @@ static decode_state opus_decompress(void) { write_buf = process.inbuf; ); -#if FRAME_BUF - frames = min(frames, FRAME_BUF); -#endif - + //printf("processing %u frames\n", frames); // write the decoded frames into outputbuf then unpack them (they are 16 bits) n = OP(u, read, u->of, (opus_int16*) write_buf, frames * channels, NULL); @@ -190,15 +187,21 @@ static decode_state opus_decompress(void) { frames = n; count = frames * channels; - iptr = (s16_t *)write_buf + count; - optr = (ISAMPLE_T *) outputbuf->writep + frames * 2; - + // work backward to unpack samples (if needed) + iptr = (s16_t *) write_buf + count; + optr = (ISAMPLE_T *) write_buf + frames * 2; + if (channels == 2) { #if BYTES_PER_FRAME == 4 - memcpy(outputbuf->writep, write_buf, frames * BYTES_PER_FRAME); +#if FRAME_BUF + // copy needed only when DIRECT and FRAME_BUF + IF_DIRECT( + memcpy(outputbuf->writep, write_buf, frames * BYTES_PER_FRAME); + ) +#endif #else while (count--) { - *--optr = *--iptr << 16; + *--optr = ALIGN(*--iptr); } #endif } else if (channels == 1) { @@ -298,8 +301,8 @@ struct codec *register_opus(void) { static struct codec ret = { 'u', // id "ops", // types - 4096, // min read - 20480, // min space + 4*1024, // min read + 32*1024, // min space opus_open, // open opus_close, // close opus_decompress, // decode @@ -311,7 +314,9 @@ struct codec *register_opus(void) { } u->of = NULL; +#if FRAME_BUF u->write_buf = NULL; +#endif if (!load_opus()) { return NULL; diff --git a/components/squeezelite/squeezelite.h b/components/squeezelite/squeezelite.h index fe75b94b..2831a80f 100644 --- a/components/squeezelite/squeezelite.h +++ b/components/squeezelite/squeezelite.h @@ -387,9 +387,6 @@ typedef BOOL bool; #endif -typedef u32_t frames_t; -typedef int sockfd; - // logging typedef enum { lERROR = 0, lWARN, lINFO, lDEBUG, lSDEBUG } log_level; @@ -401,7 +398,10 @@ void logprint(const char *fmt, ...); #define LOG_INFO(fmt, ...) if (loglevel >= lINFO) logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__) #define LOG_DEBUG(fmt, ...) if (loglevel >= lDEBUG) logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__) #define LOG_SDEBUG(fmt, ...) if (loglevel >= lSDEBUG) logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__) - + +typedef uint32_t frames_t; +typedef int sockfd; + #if EMBEDDED #include "embedded.h" #endif diff --git a/components/squeezelite/vorbis.c b/components/squeezelite/vorbis.c index 8baa12e1..6ade8b84 100644 --- a/components/squeezelite/vorbis.c +++ b/components/squeezelite/vorbis.c @@ -29,7 +29,9 @@ * thread has a higher priority. Using an interim buffer where vorbis decoder writes the output is not great from * an efficiency (one extra memory copy) point of view, but it allows the lock to not be kept for too long */ +#if EMBEDDED #define FRAME_BUF 2048 +#endif #if BYTES_PER_FRAME == 4 #define ALIGN(n) (n) @@ -183,16 +185,14 @@ static decode_state vorbis_decode(void) { } } -#if !FRAME_BUF - LOCK_O_direct; -#endif - #if FRAME_BUF IF_DIRECT( frames = min(_buf_space(outputbuf), _buf_cont_write(outputbuf)) / BYTES_PER_FRAME; + frames = min(frames, FRAME_BUF); write_buf = v->write_buf; ); #else + LOCK_O_direct; IF_DIRECT( frames = min(_buf_space(outputbuf), _buf_cont_write(outputbuf)) / BYTES_PER_FRAME; write_buf = outputbuf->writep; @@ -203,9 +203,6 @@ static decode_state vorbis_decode(void) { write_buf = process.inbuf; ); -#if FRAME_BUF - frames = min(frames, FRAME_BUF); -#endif bytes = frames * 2 * channels; // samples returned are 16 bits // write the decoded frames into outputbuf even though they are 16 bits per sample, then unpack them @@ -237,15 +234,21 @@ static decode_state vorbis_decode(void) { frames = n / 2 / channels; count = frames * channels; - iptr = (s16_t *)write_buf + count; - optr = (ISAMPLE_T *) outputbuf->writep + frames * 2; + // work backward to unpack samples (if needed) + iptr = (s16_t *) write_buf + count; + optr = (ISAMPLE_T *) write_buf + frames * 2; if (channels == 2) { #if BYTES_PER_FRAME == 4 - memcpy(outputbuf->writep, write_buf, frames * BYTES_PER_FRAME); +#if FRAME_BUF + // copy needed only when DIRECT and FRAME_BUF + IF_DIRECT( + memcpy(outputbuf->writep, write_buf, frames * BYTES_PER_FRAME); + ) +#endif #else while (count--) { - *--optr = *--iptr << 16; + *--optr = ALIGN(*--iptr); } #endif } else if (channels == 1) {