### Merge branch 'f4sat' into 'master'

```prepares elimination block order implementation

See merge request eder/msolve!19```
parents 5a6832f9 6f04d966
 ... ... @@ -233,13 +233,15 @@ static inline int is_equal_exponent_dm(bs_t *bs, ht_t *ht, long idx, long pos, const bl_t bi = bs->lmps[idx]; hm_t *dt = bs->hm[bi] + OFFSET; for(long k = 0; k < nv - 1; k++){ int e = (int32_t)ht->ev[dt[pos]][k]; for(long k = 0; k < nv-1 ; k++){ /* in hash table exponent vectors store their degree in the first entry, * thus we have to go by "k+1" */ int e = (int32_t)ht->ev[dt[pos]][k+1]; if(e != exp2[k]){ return 0; } } int e = (int32_t)ht->ev[dt[pos]][nv - 1]; int e = (int32_t)ht->ev[dt[pos]][nv]; return (e == exp2[nv - 1]); } ... ...
 ... ... @@ -211,15 +211,15 @@ static void print_msolve_polynomials_ff_32( len = bs->hm[idx][LENGTH]; cf = bs->cf_32[bs->hm[idx][COEFFS]]; ctr = 0; k = 0; while (ctr == 0 && k < nv) { k = 1; while (ctr == 0 && k <= nv) { if (ht->ev[hm][k] > 0) { fprintf(file, "%s^%u",vnames[k], ht->ev[hm][k]); ctr++; } k++; } for (;k < nv; ++k) { for (;k <= nv; ++k) { if (ht->ev[hm][k] > 0) { fprintf(file, "*%s^%u",vnames[k], ht->ev[hm][k]); } ... ... @@ -241,14 +241,14 @@ static void print_msolve_polynomials_ff_32( len = bs->hm[idx][LENGTH]; cf = bs->cf_32[bs->hm[idx][COEFFS]]; fprintf(file, "%u", cf); for (k = 0; k < nv; ++k) { for (k = 1; k <= nv; ++k) { if (ht->ev[hm][k] > 0) { fprintf(file, "*%s^%u",vnames[k], ht->ev[hm][k]); } } for (j = 1; j < len; ++j) { fprintf(file, "+%u", cf[j]); for (k = 0; k < nv; ++k) { for (k = 1; k <= nv; ++k) { if (ht->ev[hm[j]][k] > 0) { fprintf(file, "*%s^%u",vnames[k], ht->ev[hm[j]][k]); } ... ...
 ... ... @@ -180,6 +180,7 @@ static void getoptions( int32_t *reduce_gb, int32_t *print_gb, int32_t *genericity_handling, int32_t *saturate, int32_t *normal_form, int32_t *normal_form_matrix, int32_t *is_gb, ... ... @@ -192,7 +193,7 @@ static void getoptions( char *filename = NULL; char *out_fname = NULL; opterr = 1; char options[] = "hf:v:l:t:o:u:i:p:P:g:c:s:r:m:M:n:"; char options[] = "hf:v:l:t:o:u:i:p:P:g:c:s:S:r:m:M:n:"; while((opt = getopt(argc, argv, options)) != -1) { switch(opt) { case 'h': ... ... @@ -222,6 +223,15 @@ static void getoptions( case 's': *initial_hts = strtol(optarg, NULL, 10); break; case 'S': *saturate = strtol(optarg, NULL, 10); if (*saturate < 0) { *saturate = 0; } if (*saturate > 1) { *saturate = 1; } break; case 'r': *reduce_gb = strtol(optarg, NULL, 10); break; ... ... @@ -322,6 +332,7 @@ int main(int argc, char **argv){ int32_t reduce_gb = 1; int32_t print_gb = 0; int32_t genericity_handling = 2; int32_t saturate = 0; int32_t normal_form = 0; int32_t normal_form_matrix = 0; int32_t is_gb = 0; ... ... @@ -333,7 +344,7 @@ int main(int argc, char **argv){ files->out_file = NULL; getoptions(argc, argv, &initial_hts, &nr_threads, &max_pairs, &la_option, &update_ht, &reduce_gb, &print_gb, &genericity_handling, &normal_form, &normal_form_matrix, &genericity_handling, &saturate, &normal_form, &normal_form_matrix, &is_gb, &get_param, &precision, &generate_pbm, &info_level, files); ... ... @@ -367,7 +378,7 @@ int main(int argc, char **argv){ /* main msolve functionality */ int ret = core_msolve(la_option, nr_threads, info_level, initial_hts, max_pairs, update_ht, generate_pbm, reduce_gb, print_gb, get_param, genericity_handling, normal_form, normal_form_matrix, is_gb, genericity_handling, saturate, normal_form, normal_form_matrix, is_gb, precision, files, gens, ¶m, &mpz_param, &nb_real_roots, &real_roots, &real_pts); ... ...
This diff is collapsed.
 ... ... @@ -142,6 +142,7 @@ int core_msolve( int32_t print_gb, int32_t get_param, int32_t genericity_handling, int32_t saturate, int32_t normal_form, int32_t normal_form_matrix, int32_t is_gb, ... ...
 ... ... @@ -12,6 +12,7 @@ EXTRA_DIST = basis.h \ io.h \ modular.h \ nf.h \ f4sat.h \ sort_r.h \ stat.h \ tools.h \ ... ...
 ... ... @@ -29,19 +29,25 @@ static void free_basis_elements( if (bs->cf_8) { for (i = 0; i < bs->ld; ++i) { free(bs->cf_8[i]); bs->cf_8[i] = NULL; free(bs->hm[i]); bs->hm[i] = NULL; } } if (bs->cf_16) { for (i = 0; i < bs->ld; ++i) { free(bs->cf_16[i]); bs->cf_16[i] = NULL; free(bs->hm[i]); bs->hm[i] = NULL; } } if (bs->cf_32) { for (i = 0; i < bs->ld; ++i) { free(bs->cf_32[i]); bs->cf_32[i] = NULL; free(bs->hm[i]); bs->hm[i] = NULL; } } if (bs->cf_qq) { ... ... @@ -52,10 +58,12 @@ static void free_basis_elements( mpz_clear(coeffs[j]); } free(bs->cf_qq[bs->hm[i][COEFFS]]); bs->cf_qq[bs->hm[i][COEFFS]] = NULL; free(bs->hm[i]); bs->hm[i] = NULL; } } bs->ld = 0; bs->ld = bs->lo = 0; } void free_basis( ... ... @@ -153,9 +161,13 @@ static inline void check_enlarge_basis_ff_8( bs->sz = bs->sz * 2 > bs->ld + added ? bs->sz * 2 : bs->ld + added; bs->cf_8 = realloc(bs->cf_8, (unsigned long)bs->sz * sizeof(cf8_t *)); memset(bs->cf_8+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(cf8_t *)); bs->hm = realloc(bs->hm, (unsigned long)bs->sz * sizeof(hm_t *)); memset(bs->hm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(hm_t *)); bs->lm = realloc(bs->lm, (unsigned long)bs->sz * sizeof(sdm_t)); memset(bs->lm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(sdm_t)); bs->lmps = realloc(bs->lmps, (unsigned long)bs->sz * sizeof(bl_t)); memset(bs->lmps+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(bl_t)); bs->red = realloc(bs->red, (unsigned long)bs->sz * sizeof(int8_t)); memset(bs->red+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(int8_t)); ... ... @@ -238,9 +250,13 @@ static inline void check_enlarge_basis_ff_16( bs->sz = bs->sz * 2 > bs->ld + added ? bs->sz * 2 : bs->ld + added; bs->cf_16 = realloc(bs->cf_16, (unsigned long)bs->sz * sizeof(cf16_t *)); memset(bs->cf_16+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(cf16_t *)); bs->hm = realloc(bs->hm, (unsigned long)bs->sz * sizeof(hm_t *)); memset(bs->hm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(hm_t *)); bs->lm = realloc(bs->lm, (unsigned long)bs->sz * sizeof(sdm_t)); memset(bs->lm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(sdm_t)); bs->lmps = realloc(bs->lmps, (unsigned long)bs->sz * sizeof(bl_t)); memset(bs->lmps+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(bl_t)); bs->red = realloc(bs->red, (unsigned long)bs->sz * sizeof(int8_t)); memset(bs->red+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(int8_t)); ... ... @@ -304,11 +320,11 @@ static bs_t *initialize_basis_ff_32( bs->cf_8 = NULL; bs->cf_16 = NULL; bs->cf_32 = (cf32_t **)malloc((unsigned long)bs->sz * sizeof(cf32_t *)); bs->cf_32 = (cf32_t **)calloc((unsigned long)bs->sz, sizeof(cf32_t *)); bs->cf_qq = NULL; bs->hm = (hm_t **)malloc((unsigned long)bs->sz * sizeof(hm_t *)); bs->lm = (sdm_t *)malloc((unsigned long)bs->sz * sizeof(sdm_t)); bs->lmps = (bl_t *)malloc((unsigned long)bs->sz * sizeof(bl_t)); bs->hm = (hm_t **)calloc((unsigned long)bs->sz, sizeof(hm_t *)); bs->lm = (sdm_t *)calloc((unsigned long)bs->sz, sizeof(sdm_t)); bs->lmps = (bl_t *)calloc((unsigned long)bs->sz, sizeof(bl_t)); bs->red = (int8_t *)calloc((unsigned long)bs->sz, sizeof(int8_t)); return bs; ... ... @@ -323,9 +339,13 @@ static inline void check_enlarge_basis_ff_32( bs->sz = bs->sz * 2 > bs->ld + added ? bs->sz * 2 : bs->ld + added; bs->cf_32 = realloc(bs->cf_32, (unsigned long)bs->sz * sizeof(cf32_t *)); memset(bs->cf_32+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(cf32_t *)); bs->hm = realloc(bs->hm, (unsigned long)bs->sz * sizeof(hm_t *)); memset(bs->hm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(hm_t *)); bs->lm = realloc(bs->lm, (unsigned long)bs->sz * sizeof(sdm_t)); memset(bs->lm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(sdm_t)); bs->lmps = realloc(bs->lmps, (unsigned long)bs->sz * sizeof(bl_t)); memset(bs->lmps+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(bl_t)); bs->red = realloc(bs->red, (unsigned long)bs->sz * sizeof(int8_t)); memset(bs->red+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(int8_t)); ... ... @@ -334,7 +354,7 @@ static inline void check_enlarge_basis_ff_32( static inline void normalize_initial_basis_ff_32( bs_t *bs, const uint32_t fc const uint32_t fc ) { len_t i, j; ... ... @@ -408,8 +428,11 @@ static inline void check_enlarge_basis_qq( bs->cf_qq = realloc(bs->cf_qq, (unsigned long)bs->sz * sizeof(mpz_t *)); bs->hm = realloc(bs->hm, (unsigned long)bs->sz * sizeof(hm_t *)); memset(bs->hm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(hm_t *)); bs->lm = realloc(bs->lm, (unsigned long)bs->sz * sizeof(sdm_t)); memset(bs->lm+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(sdm_t)); bs->lmps = realloc(bs->lmps, (unsigned long)bs->sz * sizeof(bl_t)); memset(bs->lmps+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(bl_t)); bs->red = realloc(bs->red, (unsigned long)bs->sz * sizeof(int8_t)); memset(bs->red+bs->ld, 0, (unsigned long)(bs->sz-bs->ld) * sizeof(int8_t)); ... ...
 ... ... @@ -25,6 +25,187 @@ * hashes in the polynomials resp. rows. moreover, we have sorted each row * by pivots / non-pivots. thus we get already an A|B splicing of the * initial matrix. this is a first step for receiving a full GBLA matrix. */ static void convert_multipliers_to_columns( hi_t **hcmp, bs_t *sat, stat_t *st, ht_t *ht ) { hl_t i; hi_t *hcm = *hcmp; /* clear ht-ev */ memset(ht->ev, 0, (unsigned long)ht->nv * sizeof(exp_t)); /* timings */ double ct0, ct1, rt0, rt1; ct0 = cputime(); rt0 = realtime(); /* all elements in the sht hash table represent * exactly one column of the matrix */ hcm = realloc(hcm, (unsigned long)sat->ld * sizeof(hi_t)); for (i = 0; i < sat->ld; ++i) { hcm[i] = sat->hm[i][MULT]; } sort_r(hcm, (unsigned long)sat->ld, sizeof(hi_t), hcm_cmp, ht); /* printf("hcmm\n"); * for (int ii=0; iild; ++ii) { * printf("hcmm[%d] = %d | idx %u | ", ii, ht->hd[hcm[ii]].idx, hcm[ii]); * for (int jj = 0; jj < ht->nv; ++jj) { * printf("%d ", ht->ev[hcm[ii]][jj]); * } * printf("\n"); * } */ /* store the other direction (hash -> column) */ for (i = 0; i < sat->ld; ++i) { ht->hd[hcm[i]].idx = (hi_t)i; } /* map column positions to mul entries*/ for (i = 0; i < sat->ld; ++i) { sat->hm[i][MULT] = ht->hd[sat->hm[i][MULT]].idx; } /* timings */ ct1 = cputime(); rt1 = realtime(); st->convert_ctime += ct1 - ct0; st->convert_rtime += rt1 - rt0; *hcmp = hcm; } static void convert_hashes_to_columns_sat( hi_t **hcmp, mat_t *mat, bs_t *sat, stat_t *st, ht_t *sht ) { hl_t i; hi_t j, k; hm_t *row; int64_t nterms = 0; hi_t *hcm = *hcmp; /* timings */ double ct0, ct1, rt0, rt1; ct0 = cputime(); rt0 = realtime(); len_t hi; const len_t mnr = mat->nr; const hl_t esld = sht->eld; hd_t *hds = sht->hd; hm_t **rrows = mat->rr; /* all elements in the sht hash table represent * exactly one column of the matrix */ hcm = realloc(hcm, (esld-1) * sizeof(hi_t)); for (k = 0, j = 0, i = 1; i < esld; ++i) { hi = hds[i].idx; hcm[j++] = i; if (hi == 2) { k++; } } sort_r(hcm, (unsigned long)j, sizeof(hi_t), hcm_cmp, sht); /* printf("hcm\n"); * for (int ii=0; iiev[hcm[ii]][DEG]; * for (int jj = 0; jj < sht->nv; ++jj) { * printf("%d ", sht->ev[hcm[ii]][jj]); * } * printf("\n"); * } */ mat->ncl = k; mat->ncr = (len_t)esld - 1 - mat->ncl; st->num_rowsred += sat->ld; /* store the other direction (hash -> column) */ const hi_t ld = (hi_t)(esld - 1); for (k = 0; k < ld; ++k) { hds[hcm[k]].idx = (hi_t)k; } /* map column positions to reducer matrix */ #pragma omp parallel for num_threads(st->nthrds) private(k, j) for (k = 0; k < mat->nru; ++k) { const len_t os = rrows[k][PRELOOP]; const len_t len = rrows[k][LENGTH]; row = rrows[k] + OFFSET; for (j = 0; j < os; ++j) { row[j] = hds[row[j]].idx; } for (; j < len; j += UNROLL) { row[j] = hds[row[j]].idx; row[j+1] = hds[row[j+1]].idx; row[j+2] = hds[row[j+2]].idx; row[j+3] = hds[row[j+3]].idx; } } for (k = 0; k < mat->nru; ++k) { nterms += rrows[k][LENGTH]; } /* map column positions to saturation elements */ #pragma omp parallel for num_threads(st->nthrds) private(k, j) for (k = 0; k < sat->ld; ++k) { const len_t os = sat->hm[k][PRELOOP]; const len_t len = sat->hm[k][LENGTH]; row = sat->hm[k] + OFFSET; for (j = 0; j < os; ++j) { row[j] = hds[row[j]].idx; } for (; j < len; j += UNROLL) { row[j] = hds[row[j]].idx; row[j+1] = hds[row[j+1]].idx; row[j+2] = hds[row[j+2]].idx; row[j+3] = hds[row[j+3]].idx; } } for (k = 0; k < mat->nrl; ++k) { nterms += sat->hm[k][LENGTH]; } /* next we sort each row by the new colum order due * to known / unkown pivots */ /* NOTE: As strange as it may sound, we do not need to sort the rows. * When reducing, we copy them to dense rows, there we copy the coefficients * at the right place and reduce then. For the reducers itself it is not * important in which order the terms are represented as long as the first * term is the lead term, which is always true. Once a row is finally reduced * it is copied back to a sparse representation, now in the correct term * order since it is coming from the correctly sorted dense row. So all newly * added elements have all their terms sorted correctly w.r.t. the given * monomial order. */ /* compute density of matrix */ nterms *= 100; /* for percentage */ double density = (double)nterms / (double)mnr / (double)mat->nc; /* timings */ ct1 = cputime(); rt1 = realtime(); st->convert_ctime += ct1 - ct0; st->convert_rtime += rt1 - rt0; if (st->info_level > 1) { printf(" %7d x %-7d %8.2f%%", mat->nr + sat->ld, mat->nc, density); fflush(stdout); } *hcmp = hcm; } static void convert_hashes_to_columns( hi_t **hcmp, mat_t *mat, ... ... @@ -67,7 +248,7 @@ static void convert_hashes_to_columns( /* printf("hcm\n"); * for (int ii=0; iiev[hcm[ii]][DEG]); * for (int jj = 0; jj < sht->nv; ++jj) { * printf("%d ", sht->ev[hcm[ii]][jj]); * } ... ... @@ -104,7 +285,6 @@ static void convert_hashes_to_columns( for (k = 0; k < mat->nru; ++k) { nterms += rrows[k][LENGTH]; } #pragma omp parallel for num_threads(st->nthrds) private(k, j) for (k = 0; k < mat->nrl; ++k) { const len_t os = trows[k][PRELOOP]; ... ... @@ -153,6 +333,106 @@ static void convert_hashes_to_columns( *hcmp = hcm; } static void convert_columns_to_hashes( bs_t *bs, const hi_t * const hcm, const hi_t * const hcmm ) { len_t i, j; for (i = 0; i < bs->ld; ++i) { if (bs->hm[i] != NULL) { for (j = OFFSET; j < bs->hm[i][LENGTH]+OFFSET; ++j) { bs->hm[i][j] = hcm[bs->hm[i][j]]; } bs->hm[i][MULT] = hcmm[bs->hm[i][MULT]]; } } } static void add_kernel_elements_to_basis( bs_t *sat, bs_t *bs, bs_t *kernel, const ht_t * const ht, const hi_t * const hcm, stat_t *st ) { len_t *terms = (len_t *)calloc((unsigned long)sat->ld, sizeof(len_t)); len_t nterms = 0; len_t i, j, k; len_t ctr = 0; const len_t bld = bs->ld; /* timings */ double ct0, ct1, rt0, rt1; ct0 = cputime(); rt0 = realtime(); /* fix size of basis for entering new elements directly */ check_enlarge_basis(bs, kernel->ld); /* we need to sort the kernel elements first (in order to track * redundancy correctly) */ hm_t **rows = (hm_t **)calloc((unsigned long)kernel->ld, sizeof(hm_t *)); k = 0; for (i = 0; i < kernel->ld; ++i) { /* printf("kernel[%u] = %p\n", i, kernel->hm[i]); */ rows[k++] = kernel->hm[i]; kernel->hm[i] = NULL; } sort_matrix_rows_increasing(rows, k); /* only for 32 bit at the moment */ for (i = 0; i < kernel->ld; ++i) { bs->cf_32[bld+ctr] = kernel->cf_32[rows[i][COEFFS]]; kernel->cf_32[rows[i][COEFFS]] = NULL; bs->hm[bld+ctr] = rows[i]; bs->hm[bld+ctr][COEFFS] = bld+ctr; j = OFFSET; next_j: for (; j < bs->hm[bld+ctr][LENGTH]+OFFSET; ++j) { bs->hm[bld+ctr][j] = hcm[bs->hm[bld+ctr][j]]; if (nterms != 0) { for (int kk = 0; kk < nterms; ++kk) { if (terms[kk] == bs->hm[bld+ctr][j]) { j++; goto next_j; } } } terms[nterms] = bs->hm[bld+ctr][j]; nterms++; } if (ht->ev[bs->hm[bld+ctr][OFFSET]][DEG] == 0) { bs->constant = 1; } /* printf("new element from kernel (%u): length %u | ", bld+ctr, bs->hm[bld+ctr][LENGTH]); * for (int kk=0; kkhm[bld+ctr][LENGTH]; ++kk) { * printf("%u | ", bs->cf_32[bld+ctr][kk]); * printf("%u | ", ht->ev[bs->hm[bld+ctr][OFFSET+kk]][DEG]); * for (int jj=0; jj < ht->nv; ++jj) { * printf("%u ", ht->ev[bs->hm[bld+ctr][OFFSET+kk]][jj]); * } * printf(" || "); * } * printf("\n"); */ ctr++; } /* printf("%u of %u terms are used for kernel elements, %.2f\n", nterms, sat->ld, (float)nterms / (float)sat->ld); */ free(terms); free(rows); rows = NULL; /* timings */ ct1 = cputime(); rt1 = realtime(); st->convert_ctime += ct1 - ct0; st->convert_rtime += rt1 - rt0; } static void return_normal_forms_to_basis( mat_t *mat, bs_t *bs, ... ... @@ -227,7 +507,7 @@ static void convert_sparse_matrix_rows_to_basis_elements( case 0: for (i = 0; i < np; ++i) { insert_in_basis_hash_table_pivots(rows[i], bht, sht, hcm); if (bht->hd[rows[i][OFFSET]].deg == 0) { if (bht->ev[rows[i][OFFSET]][DEG] == 0) { bs->constant = 1; } bs->cf_qq[bl+i] = mat->cf_qq[rows[i][COEFFS]]; ... ... @@ -239,7 +519,7 @@ static void convert_sparse_matrix_rows_to_basis_elements( case 8: for (i = 0; i < np; ++i) { insert_in_basis_hash_table_pivots(rows[i], bht, sht, hcm); if (bht->hd[rows[i][OFFSET]].deg == 0) { if (bht->ev[rows[i][OFFSET]][DEG] == 0) { bs->constant = 1; } bs->cf_8[bl+i] = mat->cf_8[rows[i][COEFFS]]; ... ... @@ -250,7 +530,7 @@ static void convert_sparse_matrix_rows_to_basis_elements( case 16: for (i = 0; i < np; ++i) { insert_in_basis_hash_table_pivots(rows[i], bht, sht, hcm); if (bht->hd[rows[i][OFFSET]].deg == 0) { if (bht->ev[rows[i][OFFSET]][DEG] == 0) { bs->constant = 1; } bs->cf_16[bl+i] = mat->cf_16[rows[i][COEFFS]]; ... ... @@ -259,20 +539,51 @@ static void convert_sparse_matrix_rows_to_basis_elements( } break; case 32: /* int ii; * long power_t; * exp_t *e = bht->ev; */ for (i = 0; i < np; ++i) {