LCOV - code coverage report
Current view: top level - src - ldpc.c (source / functions) Coverage Total Hit
Test: poporon Coverage Report Lines: 87.4 % 634 554
Test Date: 2026-03-07 15:16:43 Functions: 100.0 % 32 32
Legend: Lines: hit not hit

            Line data    Source code
       1              : /*
       2              :  * libpoporon - ldpc.c
       3              :  *
       4              :  * This file is part of libpoporon.
       5              :  *
       6              :  * Author: Go Kudo <zeriyoshi@gmail.com>
       7              :  * SPDX-License-Identifier: BSD-3-Clause
       8              :  */
       9              : 
      10              : #include "internal/ldpc.h"
      11              : 
      12              : #if POPORON_USE_SIMD
      13              : #include "internal/simd.h"
      14              : #endif
      15              : 
      16              : #include <poporon/rng.h>
      17              : 
      18              : #define MIN_BLOCK_SIZE 32
      19              : #define MAX_BLOCK_SIZE 8192
      20              : 
      21              : #define DEFAULT_COL_WEIGHT         3
      22              : #define BURST_RESISTANT_COL_WEIGHT 6
      23              : 
      24              : #define MIN_COL_WEIGHT 3
      25              : #define MAX_COL_WEIGHT 8
      26              : 
      27              : #define DEFAULT_MAX_ITERATIONS 50
      28              : 
      29              : #define BURST_RESISTANT_MAX_ITERATIONS 100
      30              : 
      31              : #define LLR_SCALE_FACTOR 256
      32              : 
      33              : #define AUTO_INTERLEAVE_DEPTH_DIVISOR 4
      34              : 
      35              : #define AUTO_LIFTING_FACTOR_DIVISOR 8
      36              : #define MIN_LIFTING_FACTOR          4
      37              : #define MAX_LIFTING_FACTOR          256
      38              : 
      39              : #define MINSUM_ALPHA_NUMERATOR   15
      40              : #define MINSUM_ALPHA_DENOMINATOR 16
      41              : 
      42           44 : static inline void get_rate_params(poporon_ldpc_rate_t rate, uint32_t *info_num, uint32_t *parity_num)
      43              : {
      44           44 :     switch (rate) {
      45            4 :     case PPRN_LDPC_RATE_1_3:
      46            4 :         *info_num = 1;
      47            4 :         *parity_num = 2;
      48            4 :         break;
      49           30 :     case PPRN_LDPC_RATE_1_2:
      50           30 :         *info_num = 1;
      51           30 :         *parity_num = 1;
      52           30 :         break;
      53            4 :     case PPRN_LDPC_RATE_2_3:
      54            4 :         *info_num = 2;
      55            4 :         *parity_num = 1;
      56            4 :         break;
      57            3 :     case PPRN_LDPC_RATE_3_4:
      58            3 :         *info_num = 3;
      59            3 :         *parity_num = 1;
      60            3 :         break;
      61            0 :     case PPRN_LDPC_RATE_4_5:
      62            0 :         *info_num = 4;
      63            0 :         *parity_num = 1;
      64            0 :         break;
      65            3 :     case PPRN_LDPC_RATE_5_6:
      66            3 :         *info_num = 5;
      67            3 :         *parity_num = 1;
      68            3 :         break;
      69            0 :     default:
      70            0 :         *info_num = 1;
      71            0 :         *parity_num = 1;
      72              :     }
      73           44 : }
      74              : 
      75       721076 : static inline uint8_t get_bit(const uint8_t *data, size_t bit_idx)
      76              : {
      77       721076 :     return (data[bit_idx / 8] >> (7 - (bit_idx % 8))) & 1;
      78              : }
      79              : 
      80        76354 : static inline void set_bit(uint8_t *data, size_t bit_idx, uint8_t value)
      81              : {
      82        76354 :     size_t byte_idx = bit_idx / 8;
      83        76354 :     uint8_t bit_mask = 1 << (7 - (bit_idx % 8));
      84              : 
      85        76354 :     if (value) {
      86        38453 :         data[byte_idx] |= bit_mask;
      87              :     } else {
      88        37901 :         data[byte_idx] &= ~bit_mask;
      89              :     }
      90        76354 : }
      91              : 
      92           12 : static inline void interleave_bits(const poporon_ldpc_t *ldpc, const uint8_t *input, uint8_t *output)
      93              : {
      94              :     uint8_t bit;
      95              :     size_t i;
      96              : 
      97           12 :     if (!ldpc->interleaver.forward) {
      98            0 :         pmemcpy(output, input, ldpc->codeword_bytes);
      99            0 :         return;
     100              :     }
     101              : 
     102           12 :     pmemset(output, 0, ldpc->codeword_bytes);
     103        23052 :     for (i = 0; i < ldpc->codeword_bits; i++) {
     104        23040 :         bit = get_bit(input, i);
     105        23040 :         set_bit(output, ldpc->interleaver.forward[i], bit);
     106              :     }
     107              : }
     108              : 
     109           10 : static inline void deinterleave_bits(const poporon_ldpc_t *ldpc, const uint8_t *input, uint8_t *output)
     110              : {
     111              :     uint8_t bit;
     112              :     size_t i;
     113              : 
     114           10 :     if (!ldpc->interleaver.inverse) {
     115            0 :         pmemcpy(output, input, ldpc->codeword_bytes);
     116            0 :         return;
     117              :     }
     118              : 
     119           10 :     pmemset(output, 0, ldpc->codeword_bytes);
     120        18954 :     for (i = 0; i < ldpc->codeword_bits; i++) {
     121        18944 :         bit = get_bit(input, i);
     122        18944 :         set_bit(output, ldpc->interleaver.inverse[i], bit);
     123              :     }
     124              : }
     125              : 
     126              : static inline void interleave_llr(const poporon_ldpc_t *ldpc, const int8_t *input, int8_t *output)
     127              : {
     128              :     size_t i;
     129              : 
     130              :     if (!ldpc->interleaver.forward) {
     131              :         pmemcpy(output, input, ldpc->codeword_bits);
     132              :         return;
     133              :     }
     134              : 
     135              :     for (i = 0; i < ldpc->codeword_bits; i++) {
     136              :         output[ldpc->interleaver.forward[i]] = input[i];
     137              :     }
     138              : }
     139              : 
     140            2 : static inline void deinterleave_llr(const poporon_ldpc_t *ldpc, const int8_t *input, int8_t *output)
     141              : {
     142              :     size_t i;
     143              : 
     144            2 :     if (!ldpc->interleaver.inverse) {
     145            0 :         pmemcpy(output, input, ldpc->codeword_bits);
     146            0 :         return;
     147              :     }
     148              : 
     149         4098 :     for (i = 0; i < ldpc->codeword_bits; i++) {
     150         4096 :         output[ldpc->interleaver.inverse[i]] = input[i];
     151              :     }
     152              : }
     153              : 
     154           44 : static inline bool build_interleaver(poporon_ldpc_t *ldpc)
     155              : {
     156              :     poporon_rng_t *rng;
     157              :     uint32_t depth, width, temp, seed, rval, *col_perm;
     158              :     size_t i, j, row, col, interleaved_pos;
     159              : 
     160           44 :     if (!ldpc->config.use_inner_interleave) {
     161           31 :         ldpc->interleaver.forward = NULL;
     162           31 :         ldpc->interleaver.inverse = NULL;
     163           31 :         ldpc->interleaver.size = 0;
     164           31 :         ldpc->interleaver.depth = 0;
     165           31 :         return true;
     166              :     }
     167              : 
     168           13 :     depth = ldpc->config.interleave_depth;
     169           13 :     if (depth == 0) {
     170           13 :         depth = (uint32_t)(ldpc->codeword_bits / AUTO_INTERLEAVE_DEPTH_DIVISOR);
     171           13 :         if (depth < 8) {
     172            0 :             depth = 8;
     173              :         }
     174           13 :         if (depth > 256) {
     175           11 :             depth = 256;
     176              :         }
     177              :     }
     178              : 
     179           13 :     width = (uint32_t)((ldpc->codeword_bits + depth - 1) / depth);
     180              : 
     181           13 :     ldpc->interleaver.size = ldpc->codeword_bits;
     182           13 :     ldpc->interleaver.depth = depth;
     183              : 
     184           13 :     ldpc->interleaver.forward = (uint32_t *)pmalloc(ldpc->codeword_bits * sizeof(uint32_t));
     185           13 :     ldpc->interleaver.inverse = (uint32_t *)pmalloc(ldpc->codeword_bits * sizeof(uint32_t));
     186              : 
     187           13 :     if (!ldpc->interleaver.forward || !ldpc->interleaver.inverse) {
     188            0 :         return false;
     189              :     }
     190              : 
     191           13 :     col_perm = (uint32_t *)pmalloc(width * sizeof(uint32_t));
     192           13 :     if (!col_perm) {
     193            0 :         return false;
     194              :     }
     195              : 
     196          111 :     for (i = 0; i < width; i++) {
     197           98 :         col_perm[i] = (uint32_t)i;
     198              :     }
     199              : 
     200           13 :     seed = (uint32_t)(ldpc->config.seed ^ (uint64_t)ldpc->codeword_bits);
     201           13 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     202           13 :     if (!rng) {
     203            0 :         pfree(col_perm);
     204            0 :         return false;
     205              :     }
     206              : 
     207           98 :     for (i = width - 1; i > 0; i--) {
     208           85 :         poporon_rng_next(rng, &rval, sizeof(rval));
     209           85 :         j = rval % (i + 1);
     210           85 :         temp = col_perm[i];
     211           85 :         col_perm[i] = col_perm[j];
     212           85 :         col_perm[j] = temp;
     213              :     }
     214           13 :     poporon_rng_destroy(rng);
     215              : 
     216        25101 :     for (i = 0; i < ldpc->codeword_bits; i++) {
     217        25088 :         row = i / width;
     218        25088 :         col = i % width;
     219              : 
     220        25088 :         if (row < depth) {
     221        25088 :             interleaved_pos = col_perm[col] * depth + row;
     222        25088 :             if (interleaved_pos < ldpc->codeword_bits) {
     223        25088 :                 ldpc->interleaver.forward[i] = (uint32_t)interleaved_pos;
     224              :             } else {
     225            0 :                 ldpc->interleaver.forward[i] = (uint32_t)i;
     226              :             }
     227              :         } else {
     228            0 :             ldpc->interleaver.forward[i] = (uint32_t)i;
     229              :         }
     230              :     }
     231              : 
     232        25101 :     for (i = 0; i < ldpc->codeword_bits; i++) {
     233        25088 :         ldpc->interleaver.inverse[ldpc->interleaver.forward[i]] = (uint32_t)i;
     234              :     }
     235              : 
     236           13 :     pfree(col_perm);
     237           13 :     return true;
     238              : }
     239              : 
     240           44 : static inline bool build_outer_interleaver(poporon_ldpc_t *ldpc)
     241              : {
     242              :     poporon_rng_t *rng;
     243              :     uint32_t seed, rval, temp;
     244              :     size_t i, j;
     245              : 
     246           44 :     if (!ldpc->config.use_outer_interleave) {
     247           29 :         ldpc->outer_interleaver.forward = NULL;
     248           29 :         ldpc->outer_interleaver.inverse = NULL;
     249           29 :         ldpc->outer_interleaver.size = 0;
     250           29 :         return true;
     251              :     }
     252              : 
     253           15 :     ldpc->outer_interleaver.size = ldpc->info_bytes;
     254           15 :     ldpc->outer_interleaver.forward = (uint32_t *)pmalloc(ldpc->info_bytes * sizeof(uint32_t));
     255           15 :     ldpc->outer_interleaver.inverse = (uint32_t *)pmalloc(ldpc->info_bytes * sizeof(uint32_t));
     256              : 
     257           15 :     if (!ldpc->outer_interleaver.forward || !ldpc->outer_interleaver.inverse) {
     258            0 :         return false;
     259              :     }
     260              : 
     261         1935 :     for (i = 0; i < ldpc->info_bytes; i++) {
     262         1920 :         ldpc->outer_interleaver.forward[i] = (uint32_t)i;
     263              :     }
     264              : 
     265           15 :     seed = (uint32_t)(ldpc->config.seed ^ (uint64_t)(ldpc->info_bits ^ 0xDEADBEEF));
     266           15 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     267           15 :     if (!rng) {
     268            0 :         return false;
     269              :     }
     270              : 
     271         1920 :     for (i = ldpc->info_bytes - 1; i > 0; i--) {
     272         1905 :         poporon_rng_next(rng, &rval, sizeof(rval));
     273         1905 :         j = rval % (i + 1);
     274         1905 :         temp = ldpc->outer_interleaver.forward[i];
     275         1905 :         ldpc->outer_interleaver.forward[i] = ldpc->outer_interleaver.forward[j];
     276         1905 :         ldpc->outer_interleaver.forward[j] = temp;
     277              :     }
     278           15 :     poporon_rng_destroy(rng);
     279              : 
     280         1935 :     for (i = 0; i < ldpc->info_bytes; i++) {
     281         1920 :         ldpc->outer_interleaver.inverse[ldpc->outer_interleaver.forward[i]] = (uint32_t)i;
     282              :     }
     283              : 
     284           15 :     return true;
     285              : }
     286              : 
     287           36 : static inline bool build_parity_check_matrix_random(poporon_ldpc_t *ldpc, uint32_t col_weight)
     288              : {
     289              :     poporon_rng_t *rng;
     290              :     uint32_t col, seed, rval, *col_counts;
     291              :     size_t i, j, target_row, num_info_edges, num_parity_edges, total_edges, parity_col, idx;
     292              : 
     293           36 :     ldpc->parity_matrix.num_bits = (uint32_t)ldpc->codeword_bits;
     294           36 :     ldpc->parity_matrix.num_checks = (uint32_t)ldpc->parity_bits;
     295              : 
     296           36 :     num_info_edges = ldpc->info_bits * col_weight;
     297           36 :     num_parity_edges = ldpc->parity_bits * 2 - 1;
     298           36 :     total_edges = num_info_edges + num_parity_edges;
     299              : 
     300           36 :     ldpc->parity_matrix.num_edges = (uint32_t)total_edges;
     301              : 
     302           36 :     ldpc->parity_matrix.row_ptr = (uint32_t *)pcalloc(ldpc->parity_matrix.num_checks + 1, sizeof(uint32_t));
     303           36 :     ldpc->parity_matrix.col_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     304              : 
     305           36 :     if (!ldpc->parity_matrix.row_ptr || !ldpc->parity_matrix.col_idx) {
     306            0 :         return false;
     307              :     }
     308              : 
     309           36 :     col_counts = (uint32_t *)pcalloc(ldpc->parity_matrix.num_checks, sizeof(uint32_t));
     310           36 :     if (!col_counts) {
     311            0 :         return false;
     312              :     }
     313              : 
     314           36 :     seed = (uint32_t)ldpc->config.seed;
     315           36 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     316           36 :     if (!rng) {
     317            0 :         pfree(col_counts);
     318            0 :         return false;
     319              :     }
     320              : 
     321        33316 :     for (i = 0; i < ldpc->info_bits; i++) {
     322       165888 :         for (j = 0; j < col_weight; j++) {
     323       132608 :             poporon_rng_next(rng, &rval, sizeof(rval));
     324       132608 :             target_row = rval % ldpc->parity_bits;
     325       132608 :             col_counts[target_row]++;
     326              :         }
     327              :     }
     328           36 :     poporon_rng_destroy(rng);
     329              : 
     330        31334 :     for (i = 0; i < ldpc->parity_bits; i++) {
     331        31298 :         if (i == 0) {
     332           36 :             col_counts[i] += 1;
     333              :         } else {
     334        31262 :             col_counts[i] += 2;
     335              :         }
     336              :     }
     337              : 
     338           36 :     ldpc->parity_matrix.row_ptr[0] = 0;
     339        31334 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     340        31298 :         ldpc->parity_matrix.row_ptr[i + 1] = ldpc->parity_matrix.row_ptr[i] + col_counts[i];
     341              :     }
     342              : 
     343           36 :     pmemset(col_counts, 0, ldpc->parity_matrix.num_checks * sizeof(uint32_t));
     344              : 
     345           36 :     seed = (uint32_t)ldpc->config.seed;
     346           36 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     347           36 :     if (!rng) {
     348            0 :         pfree(col_counts);
     349            0 :         return false;
     350              :     }
     351              : 
     352        33316 :     for (i = 0; i < ldpc->info_bits; i++) {
     353       165888 :         for (j = 0; j < col_weight; j++) {
     354       132608 :             poporon_rng_next(rng, &rval, sizeof(rval));
     355       132608 :             target_row = rval % ldpc->parity_bits;
     356       132608 :             ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[target_row] + col_counts[target_row]] = (uint32_t)i;
     357       132608 :             col_counts[target_row]++;
     358              :         }
     359              :     }
     360              : 
     361        31334 :     for (i = 0; i < ldpc->parity_bits; i++) {
     362        31298 :         parity_col = ldpc->info_bits + i;
     363              : 
     364        31298 :         if (i > 0) {
     365        31262 :             ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[i] + col_counts[i]] =
     366        31262 :                 (uint32_t)(ldpc->info_bits + i - 1);
     367        31262 :             col_counts[i]++;
     368              :         }
     369              : 
     370        31298 :         ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[i] + col_counts[i]] = (uint32_t)parity_col;
     371        31298 :         col_counts[i]++;
     372              :     }
     373              : 
     374           36 :     poporon_rng_destroy(rng);
     375           36 :     pfree(col_counts);
     376              : 
     377           36 :     ldpc->parity_matrix_cols.col_ptr = (uint32_t *)pcalloc(ldpc->parity_matrix.num_bits + 1, sizeof(uint32_t));
     378           36 :     ldpc->parity_matrix_cols.row_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     379           36 :     ldpc->parity_matrix_cols.edge_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     380              : 
     381           36 :     if (!ldpc->parity_matrix_cols.col_ptr || !ldpc->parity_matrix_cols.row_idx || !ldpc->parity_matrix_cols.edge_idx) {
     382            0 :         return false;
     383              :     }
     384              : 
     385           36 :     col_counts = (uint32_t *)pcalloc(ldpc->parity_matrix.num_bits, sizeof(uint32_t));
     386           36 :     if (!col_counts) {
     387            0 :         return false;
     388              :     }
     389              : 
     390        31334 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     391       226466 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     392       195168 :             col_counts[ldpc->parity_matrix.col_idx[j]]++;
     393              :         }
     394              :     }
     395              : 
     396           36 :     ldpc->parity_matrix_cols.col_ptr[0] = 0;
     397        64614 :     for (i = 0; i < ldpc->parity_matrix.num_bits; i++) {
     398        64578 :         ldpc->parity_matrix_cols.col_ptr[i + 1] = ldpc->parity_matrix_cols.col_ptr[i] + col_counts[i];
     399        64578 :         col_counts[i] = 0;
     400              :     }
     401              : 
     402        31334 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     403       226466 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     404       195168 :             col = ldpc->parity_matrix.col_idx[j];
     405       195168 :             idx = ldpc->parity_matrix_cols.col_ptr[col] + col_counts[col];
     406       195168 :             ldpc->parity_matrix_cols.row_idx[idx] = (uint32_t)i;
     407       195168 :             ldpc->parity_matrix_cols.edge_idx[idx] = (uint32_t)j;
     408       195168 :             col_counts[col]++;
     409              :         }
     410              :     }
     411              : 
     412           36 :     pfree(col_counts);
     413              : 
     414           36 :     return true;
     415              : }
     416              : 
     417            8 : static inline bool build_parity_check_matrix_qc(poporon_ldpc_t *ldpc, uint32_t col_weight)
     418              : {
     419              :     poporon_rng_t *rng;
     420              :     uint32_t seed, rval;
     421              :     uint32_t col, lifting_factor, base_rows, base_info_cols, *col_counts, shift, row_in_block, block_row, base_col,
     422              :         pos_in_block;
     423              :     size_t i, j, target_row, num_info_edges, num_parity_edges, total_edges, parity_col, idx;
     424              : 
     425            8 :     ldpc->parity_matrix.num_bits = (uint32_t)ldpc->codeword_bits;
     426            8 :     ldpc->parity_matrix.num_checks = (uint32_t)ldpc->parity_bits;
     427              : 
     428            8 :     lifting_factor = ldpc->config.lifting_factor;
     429            8 :     if (lifting_factor == 0) {
     430            8 :         lifting_factor = (uint32_t)(ldpc->parity_bits / AUTO_LIFTING_FACTOR_DIVISOR);
     431            8 :         if (lifting_factor < MIN_LIFTING_FACTOR) {
     432            0 :             lifting_factor = MIN_LIFTING_FACTOR;
     433              :         }
     434            8 :         if (lifting_factor > MAX_LIFTING_FACTOR) {
     435            0 :             lifting_factor = MAX_LIFTING_FACTOR;
     436              :         }
     437           12 :         while ((lifting_factor & (lifting_factor - 1)) != 0) {
     438            4 :             lifting_factor &= lifting_factor - 1;
     439              :         }
     440              :     }
     441              : 
     442            8 :     base_rows = (uint32_t)((ldpc->parity_bits + lifting_factor - 1) / lifting_factor);
     443            8 :     base_info_cols = (uint32_t)((ldpc->info_bits + lifting_factor - 1) / lifting_factor);
     444              :     (void)base_info_cols;
     445              : 
     446            8 :     num_info_edges = ldpc->info_bits * col_weight;
     447            8 :     num_parity_edges = ldpc->parity_bits * 2 - 1;
     448            8 :     total_edges = num_info_edges + num_parity_edges;
     449              : 
     450            8 :     ldpc->parity_matrix.num_edges = (uint32_t)total_edges;
     451              : 
     452            8 :     ldpc->parity_matrix.row_ptr = (uint32_t *)pcalloc(ldpc->parity_matrix.num_checks + 1, sizeof(uint32_t));
     453            8 :     ldpc->parity_matrix.col_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     454              : 
     455            8 :     if (!ldpc->parity_matrix.row_ptr || !ldpc->parity_matrix.col_idx) {
     456            0 :         return false;
     457              :     }
     458              : 
     459            8 :     col_counts = (uint32_t *)pcalloc(ldpc->parity_matrix.num_checks, sizeof(uint32_t));
     460            8 :     if (!col_counts) {
     461            0 :         return false;
     462              :     }
     463              : 
     464            8 :     seed = (uint32_t)ldpc->config.seed;
     465            8 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     466            8 :     if (!rng) {
     467            0 :         pfree(col_counts);
     468            0 :         return false;
     469              :     }
     470         8200 :     for (i = 0; i < ldpc->info_bits; i++) {
     471         8192 :         base_col = (uint32_t)(i / lifting_factor);
     472         8192 :         pos_in_block = (uint32_t)(i % lifting_factor);
     473              :         (void)base_col;
     474              : 
     475        36864 :         for (j = 0; j < col_weight; j++) {
     476        28672 :             poporon_rng_next(rng, &rval, sizeof(rval));
     477        28672 :             block_row = rval % base_rows;
     478        28672 :             poporon_rng_next(rng, &rval, sizeof(rval));
     479        28672 :             shift = rval % lifting_factor;
     480              : 
     481        28672 :             row_in_block = (pos_in_block + shift) % lifting_factor;
     482        28672 :             target_row = block_row * lifting_factor + row_in_block;
     483              : 
     484        28672 :             if (target_row < ldpc->parity_bits) {
     485        28509 :                 col_counts[target_row]++;
     486              :             }
     487              :         }
     488              :     }
     489              : 
     490         7209 :     for (i = 0; i < ldpc->parity_bits; i++) {
     491         7201 :         if (i == 0) {
     492            8 :             col_counts[i] += 1;
     493              :         } else {
     494         7193 :             col_counts[i] += 2;
     495              :         }
     496              :     }
     497              : 
     498            8 :     ldpc->parity_matrix.row_ptr[0] = 0;
     499         7209 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     500         7201 :         ldpc->parity_matrix.row_ptr[i + 1] = ldpc->parity_matrix.row_ptr[i] + col_counts[i];
     501              :     }
     502              : 
     503            8 :     pmemset(col_counts, 0, ldpc->parity_matrix.num_checks * sizeof(uint32_t));
     504              : 
     505            8 :     poporon_rng_destroy(rng);
     506            8 :     seed = (uint32_t)ldpc->config.seed;
     507            8 :     rng = poporon_rng_create(XOSHIRO128PP, &seed, sizeof(seed));
     508            8 :     if (!rng) {
     509            0 :         pfree(col_counts);
     510            0 :         return false;
     511              :     }
     512         8200 :     for (i = 0; i < ldpc->info_bits; i++) {
     513         8192 :         pos_in_block = (uint32_t)(i % lifting_factor);
     514              : 
     515        36864 :         for (j = 0; j < col_weight; j++) {
     516        28672 :             poporon_rng_next(rng, &rval, sizeof(rval));
     517        28672 :             block_row = rval % base_rows;
     518        28672 :             poporon_rng_next(rng, &rval, sizeof(rval));
     519        28672 :             shift = rval % lifting_factor;
     520              : 
     521        28672 :             row_in_block = (pos_in_block + shift) % lifting_factor;
     522        28672 :             target_row = block_row * lifting_factor + row_in_block;
     523              : 
     524        28672 :             if (target_row < ldpc->parity_bits) {
     525        28509 :                 ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[target_row] + col_counts[target_row]] =
     526        28509 :                     (uint32_t)i;
     527        28509 :                 col_counts[target_row]++;
     528              :             }
     529              :         }
     530              :     }
     531              : 
     532         7209 :     for (i = 0; i < ldpc->parity_bits; i++) {
     533         7201 :         parity_col = ldpc->info_bits + i;
     534              : 
     535         7201 :         if (i > 0) {
     536         7193 :             ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[i] + col_counts[i]] =
     537         7193 :                 (uint32_t)(ldpc->info_bits + i - 1);
     538         7193 :             col_counts[i]++;
     539              :         }
     540              : 
     541         7201 :         ldpc->parity_matrix.col_idx[ldpc->parity_matrix.row_ptr[i] + col_counts[i]] = (uint32_t)parity_col;
     542         7201 :         col_counts[i]++;
     543              :     }
     544              : 
     545            8 :     poporon_rng_destroy(rng);
     546            8 :     pfree(col_counts);
     547              : 
     548            8 :     ldpc->parity_matrix_cols.col_ptr = (uint32_t *)pcalloc(ldpc->parity_matrix.num_bits + 1, sizeof(uint32_t));
     549            8 :     ldpc->parity_matrix_cols.row_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     550            8 :     ldpc->parity_matrix_cols.edge_idx = (uint32_t *)pmalloc(ldpc->parity_matrix.num_edges * sizeof(uint32_t));
     551              : 
     552            8 :     if (!ldpc->parity_matrix_cols.col_ptr || !ldpc->parity_matrix_cols.row_idx || !ldpc->parity_matrix_cols.edge_idx) {
     553            0 :         return false;
     554              :     }
     555              : 
     556            8 :     col_counts = (uint32_t *)pcalloc(ldpc->parity_matrix.num_bits, sizeof(uint32_t));
     557            8 :     if (!col_counts) {
     558            0 :         return false;
     559              :     }
     560              : 
     561         7209 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     562        50104 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     563        42903 :             col_counts[ldpc->parity_matrix.col_idx[j]]++;
     564              :         }
     565              :     }
     566              : 
     567            8 :     ldpc->parity_matrix_cols.col_ptr[0] = 0;
     568        15401 :     for (i = 0; i < ldpc->parity_matrix.num_bits; i++) {
     569        15393 :         ldpc->parity_matrix_cols.col_ptr[i + 1] = ldpc->parity_matrix_cols.col_ptr[i] + col_counts[i];
     570        15393 :         col_counts[i] = 0;
     571              :     }
     572              : 
     573         7209 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     574        50104 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     575        42903 :             col = ldpc->parity_matrix.col_idx[j];
     576        42903 :             idx = ldpc->parity_matrix_cols.col_ptr[col] + col_counts[col];
     577        42903 :             ldpc->parity_matrix_cols.row_idx[idx] = (uint32_t)i;
     578        42903 :             ldpc->parity_matrix_cols.edge_idx[idx] = (uint32_t)j;
     579        42903 :             col_counts[col]++;
     580              :         }
     581              :     }
     582              : 
     583            8 :     pfree(col_counts);
     584              : 
     585            8 :     return true;
     586              : }
     587              : 
     588           44 : static inline bool build_parity_check_matrix(poporon_ldpc_t *ldpc)
     589              : {
     590           44 :     uint32_t col_weight = ldpc->config.column_weight;
     591              : 
     592           44 :     if (col_weight < MIN_COL_WEIGHT) {
     593            0 :         col_weight = MIN_COL_WEIGHT;
     594           44 :     } else if (col_weight > MAX_COL_WEIGHT) {
     595            0 :         col_weight = MAX_COL_WEIGHT;
     596              :     }
     597              : 
     598           44 :     switch (ldpc->config.matrix_type) {
     599            8 :     case PPRN_LDPC_QC_RANDOM:
     600            8 :         return build_parity_check_matrix_qc(ldpc, col_weight);
     601           36 :     case PPRN_LDPC_RANDOM:
     602              :     default:
     603           36 :         return build_parity_check_matrix_random(ldpc, col_weight);
     604              :     }
     605              : }
     606              : 
     607           44 : static inline bool allocate_messages(poporon_ldpc_t *ldpc)
     608              : {
     609           44 :     ldpc->msg.check_to_var = (int16_t *)pcalloc(ldpc->parity_matrix.num_edges, sizeof(int16_t));
     610           44 :     ldpc->msg.var_to_check = (int16_t *)pcalloc(ldpc->parity_matrix.num_edges, sizeof(int16_t));
     611           44 :     ldpc->msg.llr_total = (int16_t *)pcalloc(ldpc->parity_matrix.num_bits, sizeof(int16_t));
     612           44 :     ldpc->temp_codeword = (uint8_t *)pmalloc(ldpc->codeword_bytes);
     613              : 
     614           44 :     if (!ldpc->msg.check_to_var || !ldpc->msg.var_to_check || !ldpc->msg.llr_total || !ldpc->temp_codeword) {
     615            0 :         return false;
     616              :     }
     617              : 
     618           44 :     if (ldpc->config.use_inner_interleave) {
     619           13 :         ldpc->temp_interleaved = (uint8_t *)pmalloc(ldpc->codeword_bytes);
     620           13 :         if (!ldpc->temp_interleaved) {
     621            0 :             return false;
     622              :         }
     623              :     } else {
     624           31 :         ldpc->temp_interleaved = NULL;
     625              :     }
     626              : 
     627           44 :     if (ldpc->config.use_outer_interleave) {
     628           15 :         ldpc->temp_outer = (uint8_t *)pmalloc(ldpc->info_bytes);
     629           15 :         if (!ldpc->temp_outer) {
     630            0 :             return false;
     631              :         }
     632              :     } else {
     633           29 :         ldpc->temp_outer = NULL;
     634              :     }
     635              : 
     636           44 :     return true;
     637              : }
     638              : 
     639          119 : static inline bool check_syndrome(const poporon_ldpc_t *ldpc, const uint8_t *codeword)
     640              : {
     641              :     uint8_t syndrome_bit;
     642              :     size_t i, j;
     643              : 
     644        81009 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     645        80949 :         syndrome_bit = 0;
     646              : 
     647       617356 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     648       536407 :             syndrome_bit ^= get_bit(codeword, ldpc->parity_matrix.col_idx[j]);
     649              :         }
     650              : 
     651        80949 :         if (syndrome_bit != 0) {
     652           59 :             return false;
     653              :         }
     654              :     }
     655              : 
     656           60 :     return true;
     657              : }
     658              : 
     659            4 : static inline void initialize_messages_soft(poporon_ldpc_t *ldpc, const int8_t *llr)
     660              : {
     661              :     int16_t llr_val;
     662              :     size_t i, j;
     663              : 
     664              : #if POPORON_USE_SIMD && defined(POPORON_SIMD_AVX2)
     665              :     {
     666            4 :         __m256i vmax = _mm256_set1_epi16(LLR_MAX);
     667            4 :         __m256i vmin = _mm256_set1_epi16(LLR_MIN);
     668              : 
     669          388 :         for (i = 0; i + 15 < ldpc->codeword_bits; i += 16) {
     670          768 :             __m128i in8 = _mm_loadu_si128((const __m128i *)&llr[i]);
     671          384 :             __m256i in16 = _mm256_cvtepi8_epi16(in8);
     672          384 :             __m256i scaled = _mm256_slli_epi16(in16, 8);
     673          384 :             scaled = _mm256_min_epi16(scaled, vmax);
     674          384 :             scaled = _mm256_max_epi16(scaled, vmin);
     675          384 :             _mm256_storeu_si256((__m256i *)&ldpc->msg.llr_total[i], scaled);
     676              :         }
     677              : 
     678            4 :         for (; i < ldpc->codeword_bits; i++) {
     679            0 :             llr_val = (int16_t)llr[i] * LLR_SCALE_FACTOR;
     680            0 :             ldpc->msg.llr_total[i] = ldpc_saturate(llr_val);
     681              :         }
     682              :     }
     683              : #elif POPORON_USE_SIMD && (defined(POPORON_SIMD_NEON) || defined(POPORON_SIMD_WASM))
     684              :     {
     685              : #if defined(POPORON_SIMD_NEON)
     686              :         int16x8_t vmax = vdupq_n_s16(LLR_MAX);
     687              :         int16x8_t vmin = vdupq_n_s16(LLR_MIN);
     688              : 
     689              :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     690              :             int8x8_t in8 = vld1_s8(&llr[i]);
     691              :             int16x8_t in16 = vmovl_s8(in8);
     692              :             int16x8_t scaled = vshlq_n_s16(in16, 8);
     693              :             scaled = vminq_s16(scaled, vmax);
     694              :             scaled = vmaxq_s16(scaled, vmin);
     695              :             vst1q_s16(&ldpc->msg.llr_total[i], scaled);
     696              :         }
     697              : #elif defined(POPORON_SIMD_WASM)
     698              :         v128_t vmax = wasm_i16x8_splat(LLR_MAX);
     699              :         v128_t vmin = wasm_i16x8_splat(LLR_MIN);
     700              : 
     701              :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     702              :             v128_t in8 = wasm_v128_load64_zero(&llr[i]);
     703              :             v128_t in16 = wasm_i16x8_extend_low_i8x16(in8);
     704              :             v128_t scaled = wasm_i16x8_shl(in16, 8);
     705              :             scaled = wasm_i16x8_min(scaled, vmax);
     706              :             scaled = wasm_i16x8_max(scaled, vmin);
     707              :             wasm_v128_store(&ldpc->msg.llr_total[i], scaled);
     708              :         }
     709              : #endif
     710              : 
     711              :         for (; i < ldpc->codeword_bits; i++) {
     712              :             llr_val = (int16_t)llr[i] * LLR_SCALE_FACTOR;
     713              :             ldpc->msg.llr_total[i] = ldpc_saturate(llr_val);
     714              :         }
     715              :     }
     716              : #else
     717              :     for (i = 0; i < ldpc->codeword_bits; i++) {
     718              :         llr_val = (int16_t)llr[i] * LLR_SCALE_FACTOR;
     719              :         ldpc->msg.llr_total[i] = ldpc_saturate(llr_val);
     720              :     }
     721              : #endif
     722              : 
     723         6148 :     for (i = 0; i < ldpc->parity_matrix.num_bits; i++) {
     724        29692 :         for (j = ldpc->parity_matrix_cols.col_ptr[i]; j < ldpc->parity_matrix_cols.col_ptr[i + 1]; j++) {
     725        23548 :             ldpc->msg.var_to_check[ldpc->parity_matrix_cols.edge_idx[j]] = ldpc->msg.llr_total[i];
     726              :         }
     727              :     }
     728              : 
     729            4 :     pmemset(ldpc->msg.check_to_var, 0, ldpc->parity_matrix.num_edges * sizeof(int16_t));
     730            4 : }
     731              : 
     732           16 : static inline void initialize_messages_hard(poporon_ldpc_t *ldpc, const uint8_t *codeword)
     733              : {
     734              :     int16_t llr_val;
     735              :     size_t i, j;
     736              : 
     737              : #if POPORON_USE_SIMD && (defined(POPORON_SIMD_AVX2) || defined(POPORON_SIMD_NEON) || defined(POPORON_SIMD_WASM))
     738              :     {
     739              :         static const int16_t bit_masks[8] = {0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001};
     740              : 
     741              : #if defined(POPORON_SIMD_AVX2)
     742           16 :         __m128i vmasks = _mm_loadu_si128((const __m128i *)bit_masks);
     743           16 :         __m128i vpos = _mm_set1_epi16(LLR_INFINITY);
     744           16 :         __m128i vneg = _mm_set1_epi16(-LLR_INFINITY);
     745              : 
     746         6800 :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     747         6784 :             uint8_t byte_val = codeword[i / 8];
     748        13568 :             __m128i vbyte = _mm_set1_epi16((int16_t)byte_val);
     749         6784 :             __m128i vbits = _mm_and_si128(vbyte, vmasks);
     750         6784 :             __m128i vis_set = _mm_cmpeq_epi16(vbits, vmasks);
     751         6784 :             __m128i result = _mm_blendv_epi8(vpos, vneg, vis_set);
     752         6784 :             _mm_storeu_si128((__m128i *)&ldpc->msg.llr_total[i], result);
     753              :         }
     754              : #elif defined(POPORON_SIMD_NEON)
     755              :         int16x8_t vmasks = vld1q_s16(bit_masks);
     756              :         int16x8_t vpos = vdupq_n_s16(LLR_INFINITY);
     757              :         int16x8_t vneg = vdupq_n_s16(-LLR_INFINITY);
     758              : 
     759              :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     760              :             uint8_t byte_val = codeword[i / 8];
     761              :             int16x8_t vbyte = vdupq_n_s16((int16_t)byte_val);
     762              :             int16x8_t vbits = vandq_s16(vbyte, vmasks);
     763              :             uint16x8_t vis_set = vceqq_s16(vbits, vmasks);
     764              :             int16x8_t result = vbslq_s16(vis_set, vneg, vpos);
     765              :             vst1q_s16(&ldpc->msg.llr_total[i], result);
     766              :         }
     767              : #elif defined(POPORON_SIMD_WASM)
     768              :         v128_t vmasks = wasm_v128_load(bit_masks);
     769              :         v128_t vpos = wasm_i16x8_splat(LLR_INFINITY);
     770              :         v128_t vneg = wasm_i16x8_splat(-LLR_INFINITY);
     771              : 
     772              :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     773              :             uint8_t byte_val = codeword[i / 8];
     774              :             v128_t vbyte = wasm_i16x8_splat((int16_t)byte_val);
     775              :             v128_t vbits = wasm_v128_and(vbyte, vmasks);
     776              :             v128_t vis_set = wasm_i16x8_eq(vbits, vmasks);
     777              :             v128_t result = wasm_v128_bitselect(vneg, vpos, vis_set);
     778              :             wasm_v128_store(&ldpc->msg.llr_total[i], result);
     779              :         }
     780              : #endif
     781              : 
     782           16 :         for (; i < ldpc->codeword_bits; i++) {
     783            0 :             llr_val = get_bit(codeword, i) ? -LLR_INFINITY : LLR_INFINITY;
     784            0 :             ldpc->msg.llr_total[i] = llr_val;
     785              :         }
     786              :     }
     787              : #else
     788              :     for (i = 0; i < ldpc->codeword_bits; i++) {
     789              :         llr_val = get_bit(codeword, i) ? -LLR_INFINITY : LLR_INFINITY;
     790              :         ldpc->msg.llr_total[i] = llr_val;
     791              :     }
     792              : #endif
     793              : 
     794        54288 :     for (i = 0; i < ldpc->parity_matrix.num_bits; i++) {
     795       243184 :         for (j = ldpc->parity_matrix_cols.col_ptr[i]; j < ldpc->parity_matrix_cols.col_ptr[i + 1]; j++) {
     796       188912 :             ldpc->msg.var_to_check[ldpc->parity_matrix_cols.edge_idx[j]] = ldpc->msg.llr_total[i];
     797              :         }
     798              :     }
     799              : 
     800           16 :     pmemset(ldpc->msg.check_to_var, 0, ldpc->parity_matrix.num_edges * sizeof(int16_t));
     801           16 : }
     802              : 
     803           59 : static inline void check_node_update(poporon_ldpc_t *ldpc)
     804              : {
     805              :     uint32_t min1_idx;
     806              :     int16_t min1, min2, sign, msg, abs_msg;
     807              :     size_t i, j, k;
     808              : 
     809        95291 :     for (i = 0; i < ldpc->parity_matrix.num_checks; i++) {
     810        95232 :         sign = 1;
     811        95232 :         min1 = LLR_MAX;
     812        95232 :         min2 = LLR_MAX;
     813        95232 :         min1_idx = 0;
     814              : 
     815       792517 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     816       697285 :             msg = ldpc->msg.var_to_check[j];
     817       697285 :             if (msg < 0) {
     818       347813 :                 sign = -sign;
     819       347813 :                 abs_msg = -msg;
     820              :             } else {
     821       349472 :                 abs_msg = msg;
     822              :             }
     823              : 
     824       697285 :             if (abs_msg < min1) {
     825        44261 :                 min2 = min1;
     826        44261 :                 min1 = abs_msg;
     827        44261 :                 min1_idx = (uint32_t)j;
     828       653024 :             } else if (abs_msg < min2) {
     829        32529 :                 min2 = abs_msg;
     830              :             }
     831              :         }
     832              : 
     833       792517 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
     834       697285 :             msg = ldpc->msg.var_to_check[j];
     835              : 
     836       697285 :             if (j == min1_idx) {
     837        41505 :                 abs_msg = min2;
     838              :             } else {
     839       655780 :                 abs_msg = min1;
     840              :             }
     841              : 
     842       697285 :             abs_msg = (int16_t)((int32_t)abs_msg * MINSUM_ALPHA_NUMERATOR / MINSUM_ALPHA_DENOMINATOR);
     843              : 
     844       697285 :             k = (msg < 0) ? -sign : sign;
     845       697285 :             ldpc->msg.check_to_var[j] = (int16_t)(k * abs_msg);
     846              :         }
     847              :     }
     848           59 : }
     849              : 
     850           59 : static inline void variable_node_update(poporon_ldpc_t *ldpc, const int8_t *channel_llr)
     851              : {
     852              :     int32_t sum;
     853              :     int16_t channel;
     854              :     size_t i, j;
     855              : 
     856       190523 :     for (i = 0; i < ldpc->parity_matrix.num_bits; i++) {
     857       190464 :         if (channel_llr) {
     858         6144 :             channel = (int16_t)channel_llr[i] * LLR_SCALE_FACTOR;
     859              :         } else {
     860       184320 :             channel = ldpc->msg.llr_total[i];
     861              :         }
     862              : 
     863       190464 :         sum = channel;
     864              : 
     865       887749 :         for (j = ldpc->parity_matrix_cols.col_ptr[i]; j < ldpc->parity_matrix_cols.col_ptr[i + 1]; j++) {
     866       697285 :             sum += ldpc->msg.check_to_var[ldpc->parity_matrix_cols.edge_idx[j]];
     867              :         }
     868              : 
     869       190464 :         ldpc->msg.llr_total[i] = ldpc_saturate(sum);
     870              : 
     871       887749 :         for (j = ldpc->parity_matrix_cols.col_ptr[i]; j < ldpc->parity_matrix_cols.col_ptr[i + 1]; j++) {
     872       697285 :             ldpc->msg.var_to_check[ldpc->parity_matrix_cols.edge_idx[j]] =
     873       697285 :                 ldpc_saturate(sum - ldpc->msg.check_to_var[ldpc->parity_matrix_cols.edge_idx[j]]);
     874              :         }
     875              :     }
     876           59 : }
     877              : 
     878           59 : static inline void make_hard_decision(poporon_ldpc_t *ldpc, uint8_t *codeword)
     879              : {
     880              :     size_t i;
     881              : 
     882           59 :     pmemset(codeword, 0, ldpc->codeword_bytes);
     883              : 
     884              : #if POPORON_USE_SIMD && (defined(POPORON_SIMD_AVX2) || defined(POPORON_SIMD_NEON) || defined(POPORON_SIMD_WASM))
     885              :     {
     886              :         static const uint8_t bit_reverse_table[256] = {
     887              :             0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88,
     888              :             0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4,
     889              :             0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC,
     890              :             0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
     891              :             0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A,
     892              :             0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6,
     893              :             0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
     894              :             0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
     895              :             0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85,
     896              :             0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD,
     897              :             0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3,
     898              :             0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
     899              :             0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97,
     900              :             0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
     901              :             0x3F, 0xBF, 0x7F, 0xFF};
     902              : 
     903              : #if defined(POPORON_SIMD_AVX2)
     904           59 :         __m128i vzero = _mm_setzero_si128(), v, neg, packed;
     905              :         int mask;
     906              : 
     907        23867 :         for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     908        47616 :             v = _mm_loadu_si128((const __m128i *)&ldpc->msg.llr_total[i]);
     909        23808 :             neg = _mm_cmpgt_epi16(vzero, v);
     910        23808 :             packed = _mm_packs_epi16(neg, neg);
     911        23808 :             mask = _mm_movemask_epi8(packed) & 0xFF;
     912        23808 :             codeword[i / 8] = bit_reverse_table[mask];
     913              :         }
     914              : #elif defined(POPORON_SIMD_NEON)
     915              :         {
     916              :             static const int16_t shift_vals[8] = {7, 6, 5, 4, 3, 2, 1, 0};
     917              :             int16x8_t vshifts = vld1q_s16(shift_vals), v;
     918              :             int16x8_t vzero = vdupq_n_s16(0);
     919              :             uint16x8_t neg, bits, shifted;
     920              : 
     921              :             for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     922              :                 v = vld1q_s16(&ldpc->msg.llr_total[i]);
     923              :                 neg = vcltq_s16(v, vzero);
     924              :                 bits = vshrq_n_u16(neg, 15);
     925              :                 shifted = vshlq_u16(bits, vshifts);
     926              :                 codeword[i / 8] = (uint8_t)vaddvq_u16(shifted);
     927              :             }
     928              :         }
     929              : #elif defined(POPORON_SIMD_WASM)
     930              :         {
     931              :             v128_t vzero = wasm_i16x8_splat(0), v, neg, packed;
     932              :             int mask;
     933              : 
     934              :             for (i = 0; i + 7 < ldpc->codeword_bits; i += 8) {
     935              :                 v = wasm_v128_load(&ldpc->msg.llr_total[i]);
     936              :                 neg = wasm_i16x8_lt(v, vzero);
     937              :                 packed = wasm_i8x16_narrow_i16x8(neg, neg);
     938              :                 mask = wasm_i8x16_bitmask(packed) & 0xFF;
     939              :                 codeword[i / 8] = bit_reverse_table[mask];
     940              :             }
     941              :         }
     942              : #endif
     943              : 
     944           59 :         for (; i < ldpc->codeword_bits; i++) {
     945            0 :             if (ldpc->msg.llr_total[i] < 0) {
     946            0 :                 set_bit(codeword, i, 1);
     947              :             }
     948              :         }
     949              :     }
     950              : #else
     951              :     for (i = 0; i < ldpc->codeword_bits; i++) {
     952              :         if (ldpc->msg.llr_total[i] < 0) {
     953              :             set_bit(codeword, i, 1);
     954              :         }
     955              :     }
     956              : #endif
     957           59 : }
     958              : 
     959           21 : extern bool poporon_ldpc_params_default(poporon_ldpc_params_t *config)
     960              : {
     961           21 :     if (!config) {
     962            0 :         return false;
     963              :     }
     964              : 
     965           21 :     config->matrix_type = PPRN_LDPC_RANDOM;
     966           21 :     config->column_weight = DEFAULT_COL_WEIGHT;
     967           21 :     config->use_inner_interleave = false;
     968           21 :     config->use_outer_interleave = false;
     969           21 :     config->interleave_depth = 0;
     970           21 :     config->lifting_factor = 0;
     971           21 :     config->seed = 0;
     972              : 
     973           21 :     return true;
     974              : }
     975              : 
     976            7 : extern bool poporon_ldpc_params_burst_resistant(poporon_ldpc_params_t *config)
     977              : {
     978            7 :     if (!config) {
     979            0 :         return false;
     980              :     }
     981              : 
     982            7 :     config->matrix_type = PPRN_LDPC_RANDOM;
     983            7 :     config->column_weight = 7;
     984            7 :     config->use_inner_interleave = true;
     985            7 :     config->use_outer_interleave = true;
     986            7 :     config->interleave_depth = 0;
     987            7 :     config->lifting_factor = 0;
     988            7 :     config->seed = 0;
     989              : 
     990            7 :     return true;
     991              : }
     992              : 
     993           47 : extern poporon_ldpc_t *poporon_ldpc_create(size_t block_size, poporon_ldpc_rate_t rate,
     994              :                                            const poporon_ldpc_params_t *config)
     995              : {
     996              :     poporon_ldpc_t *ldpc;
     997              :     poporon_ldpc_params_t default_config;
     998              :     uint32_t info_num, parity_num;
     999              : 
    1000           47 :     if (block_size < MIN_BLOCK_SIZE || block_size > MAX_BLOCK_SIZE || (block_size % 4) != 0) {
    1001            2 :         return NULL;
    1002              :     }
    1003              : 
    1004           45 :     if (rate > PPRN_LDPC_RATE_5_6) {
    1005            1 :         return NULL;
    1006              :     }
    1007              : 
    1008           44 :     get_rate_params(rate, &info_num, &parity_num);
    1009              : 
    1010           44 :     ldpc = (poporon_ldpc_t *)pcalloc(1, sizeof(poporon_ldpc_t));
    1011           44 :     if (!ldpc) {
    1012            0 :         return NULL;
    1013              :     }
    1014              : 
    1015           44 :     if (config) {
    1016           27 :         ldpc->config = *config;
    1017              :     } else {
    1018           17 :         poporon_ldpc_params_default(&default_config);
    1019           17 :         ldpc->config = default_config;
    1020              :     }
    1021              : 
    1022           44 :     ldpc->rate = rate;
    1023           44 :     ldpc->info_bits = block_size * 8;
    1024           44 :     ldpc->parity_bits = (ldpc->info_bits * parity_num) / info_num;
    1025           44 :     ldpc->codeword_bits = ldpc->info_bits + ldpc->parity_bits;
    1026           44 :     ldpc->info_bytes = block_size;
    1027           44 :     ldpc->parity_bytes = (ldpc->parity_bits + 7) / 8;
    1028           44 :     ldpc->codeword_bytes = ldpc->info_bytes + ldpc->parity_bytes;
    1029              : 
    1030           44 :     if (!build_parity_check_matrix(ldpc)) {
    1031            0 :         poporon_ldpc_destroy(ldpc);
    1032            0 :         return NULL;
    1033              :     }
    1034              : 
    1035           44 :     if (!build_interleaver(ldpc)) {
    1036            0 :         poporon_ldpc_destroy(ldpc);
    1037            0 :         return NULL;
    1038              :     }
    1039              : 
    1040           44 :     if (!build_outer_interleaver(ldpc)) {
    1041            0 :         poporon_ldpc_destroy(ldpc);
    1042            0 :         return NULL;
    1043              :     }
    1044              : 
    1045           44 :     if (!allocate_messages(ldpc)) {
    1046            0 :         poporon_ldpc_destroy(ldpc);
    1047            0 :         return NULL;
    1048              :     }
    1049              : 
    1050           44 :     return ldpc;
    1051              : }
    1052              : 
    1053           45 : extern void poporon_ldpc_destroy(poporon_ldpc_t *ldpc)
    1054              : {
    1055           45 :     if (!ldpc) {
    1056            1 :         return;
    1057              :     }
    1058              : 
    1059           44 :     pfree(ldpc->parity_matrix.row_ptr);
    1060           44 :     pfree(ldpc->parity_matrix.col_idx);
    1061           44 :     pfree(ldpc->parity_matrix_cols.col_ptr);
    1062           44 :     pfree(ldpc->parity_matrix_cols.row_idx);
    1063           44 :     pfree(ldpc->parity_matrix_cols.edge_idx);
    1064           44 :     pfree(ldpc->interleaver.forward);
    1065           44 :     pfree(ldpc->interleaver.inverse);
    1066           44 :     pfree(ldpc->outer_interleaver.forward);
    1067           44 :     pfree(ldpc->outer_interleaver.inverse);
    1068           44 :     pfree(ldpc->msg.check_to_var);
    1069           44 :     pfree(ldpc->msg.var_to_check);
    1070           44 :     pfree(ldpc->msg.llr_total);
    1071           44 :     pfree(ldpc->temp_codeword);
    1072           44 :     pfree(ldpc->temp_interleaved);
    1073           44 :     pfree(ldpc->temp_outer);
    1074           44 :     pfree(ldpc);
    1075              : }
    1076              : 
    1077           22 : extern size_t poporon_ldpc_info_size(const poporon_ldpc_t *ldpc)
    1078              : {
    1079           22 :     if (!ldpc) {
    1080            1 :         return 0;
    1081              :     }
    1082              : 
    1083           21 :     return ldpc->info_bytes;
    1084              : }
    1085              : 
    1086           21 : extern size_t poporon_ldpc_codeword_size(const poporon_ldpc_t *ldpc)
    1087              : {
    1088           21 :     if (!ldpc) {
    1089            1 :         return 0;
    1090              :     }
    1091              : 
    1092           20 :     return ldpc->codeword_bytes;
    1093              : }
    1094              : 
    1095           47 : extern size_t poporon_ldpc_parity_size(const poporon_ldpc_t *ldpc)
    1096              : {
    1097           47 :     if (!ldpc) {
    1098            1 :         return 0;
    1099              :     }
    1100              : 
    1101           46 :     return ldpc->parity_bytes;
    1102              : }
    1103              : 
    1104           40 : extern bool poporon_ldpc_encode(poporon_ldpc_t *ldpc, const uint8_t *info, uint8_t *parity)
    1105              : {
    1106              :     uint32_t col;
    1107              :     uint8_t xor_val, prev_parity, *codeword;
    1108              :     size_t i, j;
    1109              : 
    1110           40 :     if (!ldpc || !info || !parity) {
    1111            3 :         return false;
    1112              :     }
    1113              : 
    1114           37 :     codeword = ldpc->temp_codeword;
    1115           37 :     pmemset(codeword, 0, ldpc->codeword_bytes);
    1116           37 :     pmemcpy(codeword, info, ldpc->info_bytes);
    1117              : 
    1118           37 :     prev_parity = 0;
    1119              : 
    1120        34407 :     for (i = 0; i < ldpc->parity_bits; i++) {
    1121        34370 :         xor_val = 0;
    1122              : 
    1123       245758 :         for (j = ldpc->parity_matrix.row_ptr[i]; j < ldpc->parity_matrix.row_ptr[i + 1]; j++) {
    1124       211388 :             col = ldpc->parity_matrix.col_idx[j];
    1125       211388 :             if (col < ldpc->info_bits) {
    1126       142685 :                 xor_val ^= get_bit(codeword, col);
    1127              :             }
    1128              :         }
    1129              : 
    1130        34370 :         xor_val ^= prev_parity;
    1131              : 
    1132        34370 :         set_bit(codeword, ldpc->info_bits + i, xor_val);
    1133        34370 :         prev_parity = xor_val;
    1134              :     }
    1135              : 
    1136           37 :     pmemcpy(parity, codeword + ldpc->info_bytes, ldpc->parity_bytes);
    1137              : 
    1138           37 :     return true;
    1139              : }
    1140              : 
    1141           23 : extern bool poporon_ldpc_check(const poporon_ldpc_t *ldpc, const uint8_t *codeword)
    1142              : {
    1143           23 :     if (!ldpc || !codeword) {
    1144            2 :         return false;
    1145              :     }
    1146              : 
    1147           21 :     return check_syndrome(ldpc, codeword);
    1148              : }
    1149              : 
    1150           41 : extern bool poporon_ldpc_decode_hard(poporon_ldpc_t *ldpc, uint8_t *codeword, uint32_t max_iterations,
    1151              :                                      uint32_t *iterations_used)
    1152              : {
    1153              :     uint32_t iter;
    1154              :     uint8_t *working_codeword;
    1155              : 
    1156           41 :     if (!ldpc || !codeword) {
    1157            2 :         return false;
    1158              :     }
    1159              : 
    1160           39 :     if (max_iterations == 0) {
    1161           11 :         max_iterations = DEFAULT_MAX_ITERATIONS;
    1162              :     }
    1163              : 
    1164           39 :     working_codeword = ldpc->temp_codeword;
    1165           39 :     if (ldpc->config.use_inner_interleave && ldpc->interleaver.inverse) {
    1166            9 :         deinterleave_bits(ldpc, codeword, working_codeword);
    1167              :     } else {
    1168           30 :         pmemmove(working_codeword, codeword, ldpc->codeword_bytes);
    1169              :     }
    1170              : 
    1171           39 :     if (check_syndrome(ldpc, working_codeword)) {
    1172           23 :         pmemmove(codeword, working_codeword, ldpc->codeword_bytes);
    1173           23 :         if (iterations_used) {
    1174           23 :             *iterations_used = 0;
    1175              :         }
    1176           23 :         return true;
    1177              :     }
    1178              : 
    1179           16 :     initialize_messages_hard(ldpc, working_codeword);
    1180              : 
    1181           55 :     for (iter = 0; iter < max_iterations; iter++) {
    1182           55 :         check_node_update(ldpc);
    1183           55 :         variable_node_update(ldpc, NULL);
    1184              : 
    1185           55 :         make_hard_decision(ldpc, working_codeword);
    1186              : 
    1187           55 :         if (check_syndrome(ldpc, working_codeword)) {
    1188           16 :             pmemmove(codeword, working_codeword, ldpc->codeword_bytes);
    1189              : 
    1190           16 :             if (iterations_used) {
    1191           16 :                 *iterations_used = iter + 1;
    1192              :             }
    1193           16 :             return true;
    1194              :         }
    1195              :     }
    1196              : 
    1197            0 :     pmemmove(codeword, working_codeword, ldpc->codeword_bytes);
    1198              : 
    1199            0 :     if (iterations_used) {
    1200            0 :         *iterations_used = max_iterations;
    1201              :     }
    1202              : 
    1203            0 :     return false;
    1204              : }
    1205              : 
    1206            4 : extern bool poporon_ldpc_decode_soft(poporon_ldpc_t *ldpc, const int8_t *llr, uint8_t *codeword,
    1207              :                                      uint32_t max_iterations, uint32_t *iterations_used)
    1208              : {
    1209              :     uint32_t iter;
    1210              :     uint8_t *working_codeword;
    1211              :     int8_t *working_llr;
    1212              : 
    1213            4 :     if (!ldpc || !llr || !codeword) {
    1214            0 :         return false;
    1215              :     }
    1216              : 
    1217            4 :     if (max_iterations == 0) {
    1218            0 :         max_iterations = DEFAULT_MAX_ITERATIONS;
    1219              :     }
    1220              : 
    1221            4 :     working_llr = NULL;
    1222            4 :     if (ldpc->config.use_inner_interleave && ldpc->interleaver.inverse) {
    1223            2 :         working_llr = (int8_t *)pmalloc(ldpc->codeword_bits);
    1224            2 :         if (!working_llr) {
    1225            0 :             return false;
    1226              :         }
    1227            2 :         deinterleave_llr(ldpc, llr, working_llr);
    1228            2 :         initialize_messages_soft(ldpc, working_llr);
    1229              :     } else {
    1230            2 :         initialize_messages_soft(ldpc, llr);
    1231              :     }
    1232              : 
    1233            4 :     working_codeword = ldpc->temp_codeword;
    1234              : 
    1235            4 :     for (iter = 0; iter < max_iterations; iter++) {
    1236            4 :         check_node_update(ldpc);
    1237            4 :         variable_node_update(ldpc, working_llr ? working_llr : llr);
    1238              : 
    1239            4 :         make_hard_decision(ldpc, working_codeword);
    1240              : 
    1241            4 :         if (check_syndrome(ldpc, working_codeword)) {
    1242            4 :             pmemmove(codeword, working_codeword, ldpc->codeword_bytes);
    1243              : 
    1244            4 :             if (working_llr) {
    1245            2 :                 pfree(working_llr);
    1246              :             }
    1247            4 :             if (iterations_used) {
    1248            4 :                 *iterations_used = iter + 1;
    1249              :             }
    1250            4 :             return true;
    1251              :         }
    1252              :     }
    1253              : 
    1254            0 :     pmemmove(codeword, working_codeword, ldpc->codeword_bytes);
    1255              : 
    1256            0 :     if (working_llr) {
    1257            0 :         pfree(working_llr);
    1258              :     }
    1259            0 :     if (iterations_used) {
    1260            0 :         *iterations_used = max_iterations;
    1261              :     }
    1262              : 
    1263            0 :     return false;
    1264              : }
    1265              : 
    1266            6 : extern bool poporon_ldpc_has_interleaver(const poporon_ldpc_t *ldpc)
    1267              : {
    1268            6 :     if (!ldpc) {
    1269            0 :         return false;
    1270              :     }
    1271            6 :     return ldpc->config.use_inner_interleave && ldpc->interleaver.forward != NULL;
    1272              : }
    1273              : 
    1274           13 : extern bool poporon_ldpc_interleave(const poporon_ldpc_t *ldpc, const uint8_t *input, uint8_t *output)
    1275              : {
    1276           13 :     if (!ldpc || !input || !output) {
    1277            0 :         return false;
    1278              :     }
    1279              : 
    1280           13 :     if (!ldpc->interleaver.forward) {
    1281            1 :         pmemcpy(output, input, ldpc->codeword_bytes);
    1282            1 :         return true;
    1283              :     }
    1284              : 
    1285           12 :     interleave_bits(ldpc, input, output);
    1286              : 
    1287           12 :     return true;
    1288              : }
    1289              : 
    1290            1 : extern bool poporon_ldpc_deinterleave(const poporon_ldpc_t *ldpc, const uint8_t *input, uint8_t *output)
    1291              : {
    1292            1 :     if (!ldpc || !input || !output) {
    1293            0 :         return false;
    1294              :     }
    1295              : 
    1296            1 :     if (!ldpc->interleaver.inverse) {
    1297            0 :         pmemcpy(output, input, ldpc->codeword_bytes);
    1298            0 :         return true;
    1299              :     }
    1300              : 
    1301            1 :     deinterleave_bits(ldpc, input, output);
    1302              : 
    1303            1 :     return true;
    1304              : }
        

Generated by: LCOV version 2.0-1