Commit 44c6729a authored by Alexis Baudin's avatar Alexis Baudin
Browse files

cpm cpmz

parents
*.o
\ No newline at end of file
CC=gcc
CFLAGS=-O9
all: cpm.o cpmz.o kClist-stdout.o
%.o: %.c
$(CC) $< -o $@ $(CFLAGS)
clean:
rm -f *.o
# CPM
## Compilation
> make
In the following, the programs are run with:
- $k \geq 3$
- edgelist.txt: an edge per line represented by two nodes separated with space
## $k$-clique streaming
To generate the stream of $k$-cliques from a graph, execute:
> ./kClist-stdout.o k edgelist.txt
## Exact CPM
The program takes as input a stream of $(k-1)$-clique followed by a stream of $k$-clique, separated by the character 'X'. It computes the $k$-clique communities by percolation.
To count the $k$-clique communities :
> bash cpm.sh k edgelist.txt
To list the communities in the standard output :
> bash cpm.sh k edgelist.txt --write-commus
## CPMZ
The program takes as input a stream of $(k-1)$-clique followed by a stream of $k$-clique, separated by the character 'X'. It computes memory efficient approximate solution of CPMZ.
To count the relaxed $k$-clique communities :
> bash cpmz.sh z k edgelist.txt
To list these communities in the standard output :
> bash cpm.sh z k edgelist.txt --write-commus
With $2 \geq z \geq k-1$
##
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <locale.h>
#include <ctype.h> // for function isspace
typedef unsigned long int Node;
typedef unsigned long int Edge;
typedef unsigned long long int Clique;
typedef unsigned char Kvalue;
#define S1MAX 100000000
#define S2MAX 10000 // maximum size of sets of type 2, will automatically increase if needed
////////////////////////
// LIST DATASTRUCTURE //
////////////////////////
typedef struct {
Clique n; // size of the list
Clique nmax; // size max of the list
Node *tab; // list of the elements
} ListNode;
ListNode* alloc_ListNode( Clique n ) {
ListNode* l = malloc( sizeof( ListNode ) );
l->n = 0;
l->nmax = n;
l->tab = malloc( l->nmax * sizeof( Node ) );
return l;
}
inline void add_to_ListNode( Clique elt, ListNode* l ) {
if ( l->n == l->nmax ) {
l->nmax *= 2;
l->tab = realloc( l->tab, l->nmax * sizeof( Node ) );
}
l->tab[l->n++] = elt;
}
///////////////////////
// SET DATASTRUCTURE //
///////////////////////
typedef struct {
Clique nl; // number of elements in set
Clique nlmax; // maximum number of element in set
Clique* list; // elements in set
Clique ntmax; // size of tab
Kvalue* tab; // tab[i]==1 iff i is in list
} Set ;
Set* allocset(int init_size) {
Set* s = malloc( sizeof( Set ) );
s->nl = 0;
s->nlmax = init_size;
s->list = malloc( init_size * sizeof( Clique ) );
s->ntmax = init_size;
s->tab = calloc( init_size, sizeof( Kvalue ) );
return s;
}
static inline bool isinset( Clique p, Set* s ) {
if ( p >= s->ntmax ) {
return 0;
}
return s->tab[p];
}
static inline void add2set( Clique p, Set* s ) {
if ( p >= s->ntmax ) {
s->tab = realloc( s->tab, ( p + S2MAX ) * sizeof( Kvalue ) );
bzero( s->tab + s->ntmax, ( p + S2MAX - s->ntmax ) * sizeof( Kvalue ) );
s->ntmax = p + S2MAX;
}
if ( s->tab[p] == 0 ) {
s->nl++;
if ( s->nl == s->nlmax ) {
s->nlmax *= 2;
s->list = realloc( s->list, s->nlmax * sizeof( Clique ) );
}
s->list[s->nl-1] = p;
}
s->tab[p]++;
}
static inline void clearset( Set* s ) {
Clique i;
for ( i = 0; i < s->nl; i++ ) {
s->tab[s->list[i]] = 0;
}
s->nl = 0;
}
////////////////
// HASH TABLE //
////////////////
static inline Clique hash( Node *cknode, Clique tablesize, Kvalue k ) {
Clique value = 0;
Kvalue i;
static unsigned *a = NULL;
if ( a == NULL ) {
srand( time(0) );
a = malloc( k * sizeof(Clique) );
for ( i = 0; i < k; i++ ){
a[i] = ( rand() % (tablesize - 1) ) + 1;
}
}
for ( i = 0; i < k; i++ ) {
value += a[i] * cknode[i];
}
return value % tablesize;
}
/////////////////////////////
// UNIONFIND DATASTRUCTURE //
/////////////////////////////
typedef struct {
Clique n; // number of (k-1)-cliques
Kvalue k; // value of k-1
ListNode* clique_list; // storing the (k-1)-cliques
Clique tablesize; // size of hash table
Node *hashcount; // hashcount[h] = number of (k-1)-cliques having h as hash value
Clique *hashid; // id of hash value row h in the hash table
Node **hashtable; // hash table containing pointers to (k-1)-cliques
Clique *p; // parents
unsigned char *r; // ranks
} UnionFind;
UnionFind* makeUnionFind( Clique n, Kvalue k, ListNode* clique_list, Node* cknode ) {
Clique i, h, idh;
Node maxi = 0;
Kvalue a;
UnionFind *uf = malloc( sizeof( UnionFind ) );
uf->n = n;
uf->k = k;
uf->clique_list = clique_list;
uf->p = malloc( uf->n * sizeof( Clique ) );
uf->r = malloc( uf->n * sizeof( unsigned char ) );
for ( i = 0; i < n; i++ ) {
uf->p[i] = -1; // NULL
uf->r[i] = 0;
}
// Count the number of (k-1)-cliques belonging to each hash value
uf->tablesize = uf->n;
uf->hashcount = calloc( uf->tablesize, sizeof( Clique ) );
for ( i = 0; i < uf->n; i++ ) {
for ( a = 0; a < uf->k; a++ ) {
cknode[a] = clique_list->tab[i * uf->k + a];
}
h = hash( cknode, uf->tablesize, uf->k );
if ( ++uf->hashcount[h] > maxi ) {
maxi = uf->hashcount[h];
}
}
// DISPLAY HASH TABLE POPULATION
Node *tmpcount;
tmpcount = calloc( maxi + 1, sizeof( Node ) );
for (h = 0; h < uf->tablesize; h++) {
tmpcount[uf->hashcount[h]]++;
}
// printf("Size max of a row in the hash table = %lu\n", maxi);
// for (int i = 0; i <= maxi; i++) {
// printf("Number of rows with %d elements = %'lu\n", i, tmpcount[i]);
// }
// Compute id of each hash row
uf->hashid = malloc( uf->tablesize * sizeof( Clique ) );
idh = 0;
for ( h = 0; h < uf->tablesize; h++ ) {
if ( uf->hashcount[h] > 0 ) {
uf->hashid[h] = idh;
idh += uf->hashcount[h];
}
}
free( uf->hashcount );
// Make the hash table
if ( idh != uf->n ) {
printf( "idh must be equal to the number of z-cliques\n" );
printf( "idh = %llu\n", idh );
exit(1);
}
uf->hashtable = calloc( n, sizeof( Clique* ) );
// Fill the hash table
for ( i = 0; i < uf->n; i++ ) {
for ( a = 0; a < uf->k; a++ ) {
cknode[a] = clique_list->tab[i * uf->k + a];
}
h = hash( cknode, uf->tablesize, uf->k );
idh = uf->hashid[h];
while ( uf->hashtable[idh] != 0 ) {
idh += 1;
}
uf->hashtable[idh] = uf->clique_list->tab + i * uf->k; // pointer to (k-1)-clique
}
return uf;
}
inline bool isequal( Node *c1, Node *c2, Kvalue k ) {
Node u;
for ( u = 0; u < k; u++ ) {
if ( c1[u] != c2[u] ) {
return false;
}
}
return true;
}
static inline Clique cliqueid( Node *ck, UnionFind *uf ) {
Clique h = hash( ck, uf->tablesize, uf->k );
Clique idh = uf->hashid[h];
while ( !isequal( ck, uf->hashtable[idh], uf->k ) ) {
idh += 1;
}
return idh;
}
Clique Find( Clique x, UnionFind *uf ) {
if ( uf->p[x]!=x ) {
uf->p[x] = Find( uf->p[x], uf );
}
return uf->p[x];
}
inline Clique FindOrCreate( Clique i, UnionFind *uf ) {
if ( uf->p[i]==-1 ) {
uf->p[i] = i;
return i;
}
return Find( i, uf );
}
inline Clique Union( Clique xr, Clique yr, UnionFind *uf ) {
if ( xr==yr || xr==-1 ) {
return yr;
}
if ( uf->r[xr] < uf->r[yr] ) {
uf->p[xr] = yr;
return yr;
}
if ( uf->r[xr] > uf->r[yr] ) {
uf->p[yr] = xr;
return xr;
}
uf->p[yr] = xr;
uf->r[xr] = uf->r[xr] + 1;
return xr;
}
///////////////////////
// Build communities //
///////////////////////
static inline void mkcoms( Kvalue k, Node* cknode, Node* cknode2, UnionFind* uf ) {
Clique p, q, id;
Kvalue a, b, c;
p = -1;
// For each (k-1)-clique of cknode
for ( a = 0; a < k; a++ ) {
c = 0;
for ( b = 0; b < k; b++ ) {
if ( b != a ) {
cknode2[c++] = cknode[b];
}
}
id = cliqueid( cknode2, uf );
q = FindOrCreate( id, uf );
p = Union( p, q, uf );
}
}
inline void clique_from_line( Node *cknode, char *string ) {
char *tail;
Node next;
int i = 0;
while ( 1 ) {
// Skip whitespace by hand, to detect the end.
while ( isspace( *string ) ) string++;
if ( *string == 0 )
return;
// There is more nonwhitespace,
// so it ought to be another number.
// Parse it.
next = strtol( string, &tail, 0 );
// Add it in cknode
cknode[i++] = next;
// Advance past it.
string = tail;
}
}
inline void addListNode_from_line( ListNode* clique_list, char *string ) {
char *tail;
Node next;
while ( 1 ) {
while ( isspace( *string ) ) string++;
if ( *string == 0 )
return;
next = strtol( string, &tail, 0 );
add_to_ListNode( next, clique_list );
string = tail;
}
}
int main( int argc, char** argv ) {
Clique i;
Kvalue k = atoi( argv[1] );
Node *cknode = malloc( k * sizeof(Node) );
Node *cknode2 = malloc( k * sizeof(Node) );
UnionFind* uf;
Clique n;
bool write_commus = false;
char *output;
if ( argc > 2 ) {
write_commus = strcmp( argv[2], "--write-commus" ) == 0;
}
ListNode* clique_list = alloc_ListNode( S1MAX );
// Reading STDIN
char * line = NULL;
size_t len = 0;
ssize_t read;
char * data;
if ( ! write_commus ) {
printf( "k = %d\n", k );
}
// Store (k-1)-cliques
n = 0;
while ( (read = getline(&line, &len, stdin)) != -1 ) {
if ( line[0] == 'X' ) {
// (k-1)-clique stream is done
break;
}
addListNode_from_line( clique_list, line );
n++;
}
if ( ! write_commus ) {
printf("Number of (k-1)-cliques = %lld\n", n );
}
// Create UnionFind
uf = makeUnionFind( n, k-1, clique_list, cknode );
// CPM algo
n = 0;
while ( (read = getline(&line, &len, stdin)) != -1 ) {
clique_from_line( cknode, line );
mkcoms( k, cknode, cknode2, uf );
n++;
}
if ( ! write_commus ) {
printf( "Number of k-cliques = %lld\n", n );
}
if ( ! write_commus ) {
n = 0;
for ( i = 0; i < uf->n; i++ ) {
if ( uf->p[i] == i ) {
n++;
}
}
printf( "Number of %u-clique communities = %llu\n", k, n );
}
else {
Set* commu_roots = allocset( S2MAX );
Clique max_root = 0;
for (i = 0; i < uf->n; i++) {
if (uf->p[i] == i){
if (i > max_root) {
max_root = i;
}
add2set(i, commu_roots);
}
}
n = commu_roots->nl;
Clique* commu_id = malloc((max_root+1) * sizeof(Clique));
for (i = 0; i < commu_roots->nl; i++) {
commu_id[commu_roots->list[i]] = i;
}
// build communities
Set** commu_nodes = malloc(n * sizeof(Set*));
for (i = 0; i < n; i++) {
commu_nodes[i] = allocset(k);
}
// Browse (k-1)-cliques
Clique c, id, p, q, comid, j, i;
Kvalue a;
for (c = 0; c < uf->tablesize; c++) {
cknode = uf->hashtable[c];
if (uf->p[c] != -1) {
p = Find(uf->p[c], uf);
comid = commu_id[p];
for (a = 0; a < k-1; a++) {
add2set(cknode[a], commu_nodes[comid]);
}
}
}
// write communities
for(i = 0; i < n; i++){
for (j = 0; j < commu_nodes[i]->nl; j++) {
printf ("%lld ", commu_nodes[i]->list[j]);
}
printf("\n");
}
}
return 0;
}
#!/bin/bash
k=$1
net=$2
writecommus=$3
(./kClist-stdout.o $((k-1)) $net ; echo "X" ; ./kClist-stdout.o $k $net) | ./cpm.o $k $writecommus
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <time.h>
#include <ctype.h> // for function isspace
#define NLINKS 100000000 // maximum number of edges, will automatically increase if needed
#define S1MAX 2 // maximum size of sets of type 1, will automatically increase if needed
#define S2MAX 10000 // maximum size of sets of type 2, will automatically increase if needed
#define S3MAX 100000000 // size of clique set
typedef unsigned long int Node;
typedef unsigned long int Edge;//////////////////
typedef unsigned long long int Clique;
typedef unsigned short int Kvalue;
////////////////////////
// LIST DATASTRUCTURE //
////////////////////////
typedef struct {
Clique n; // size of the list
Clique nmax; // size max of the list
Node *tab; // list of the elements
} ListNode;
ListNode* alloc_ListNode( Clique n ) {
ListNode* l = malloc( sizeof( ListNode ) );
l->n = 0;
l->nmax = n;
l->tab = malloc( l->nmax * sizeof( Node ) );
return l;
}
inline void add_to_ListNode( Node elt, ListNode* l ) {
if ( l->n == l->nmax ) {
l->nmax *= 2;
l->tab = realloc( l->tab, l->nmax * sizeof( Node ) );
}
l->tab[l->n++] = elt;
}
////////////////
// HASH TABLE //
////////////////
static inline Clique hash( Node *cknode, Clique tablesize, Kvalue k ) {
Clique value = 0;
Kvalue i;
static unsigned *a = NULL;
if ( a == NULL ) {
srand( time(0) );
a = malloc( k * sizeof(Clique) );
for ( i = 0; i < k; i++ ){
a[i] = ( rand() % (tablesize - 1) ) + 1;
}
}
for ( i = 0; i < k; i++ ) {
value += a[i] * cknode[i];
}
return value % tablesize;
}
///////////////////////
// SET DATASTRUCTURE //
///////////////////////
typedef struct {
Clique nl; // number of elements in set
Clique nlmax; // maximum number of element in set
Clique* list; // elements in set
Clique ntmax; // size of tab
Kvalue* tab; // tab[i]==1 iff i is in list
} Set ;
Set* allocset() {
Set* s = malloc( sizeof( Set ) );
s->nl = 0;
s->nlmax = S2MAX;
s->list = malloc( S2MAX * sizeof( Clique ) );
s->ntmax = S2MAX;
s->tab = calloc( S2MAX, sizeof( Kvalue ) );
return s;
}
static inline bool isinset( Clique p, Set* s ) {
if ( p >= s->ntmax ) {
return 0;
}
return s->tab[p];
}
static inline void add2set( Clique p, Set* s ) {
if ( p >= s->ntmax ) {
s->tab = realloc( s->tab, ( p + S2MAX ) * sizeof( Kvalue ) );
bzero( s->tab + s->ntmax, ( p + S2MAX - s->ntmax ) * sizeof( Kvalue ) );
s->ntmax