Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
vihgo
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
vihgo
Commits
d066f640
Commit
d066f640
authored
Sep 24, 2020
by
Yannis Duffourd
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Code reformating for clang complience
parent
37344ab2
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
211 additions
and
261 deletions
+211
-261
utils.cpp
cpp/utils.cpp
+211
-261
No files found.
cpp/utils.cpp
View file @
d066f640
// utility functions for bioinformatics
// utility functions for bioinformatics
#include <iostream>
#include <algorithm>
#include <string>
#include <cmath>
#include <cstring>
#include <fstream>
#include <fstream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <stdexcept>
#include <stdexcept>
#include <cstring>
#include <string>
#include <algorithm>
#include <cmath>
#include <sys/time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/types.h>
...
@@ -16,307 +16,257 @@
...
@@ -16,307 +16,257 @@
#include "utils.h"
#include "utils.h"
using
namespace
std
;
using
namespace
std
;
using
namespace
boost
::
math
;
using
namespace
boost
::
math
;
using
boost
::
math
::
cdf
;
using
boost
::
math
::
chi_squared
;
using
boost
::
math
::
chi_squared
;
using
boost
::
math
::
quantile
;
using
boost
::
math
::
complement
;
using
boost
::
math
::
complement
;
using
boost
::
math
::
cdf
;
using
boost
::
math
::
quantile
;
// test if a file is readable
// test if a file is readable
// return value : true if readable ; false if not
// return value : true if readable ; false if not
bool
IsFileReadable
(
string
file
)
bool
IsFileReadable
(
string
file
)
{
{
ifstream
fichier
(
file
.
c_str
());
ifstream
fichier
(
file
.
c_str
()
);
return
!
fichier
.
fail
();
return
!
fichier
.
fail
();
}
}
// display a time lenth in µs
// display a time lenth in µs
// return value : void
// return value : void
void
ExecMeasure
(
struct
timeval
begin
,
struct
timeval
end
,
string
operation
)
void
ExecMeasure
(
struct
timeval
begin
,
struct
timeval
end
,
string
operation
)
{
{
cerr
<<
"Execution time for operation : "
<<
operation
<<
" : "
cerr
<<
"Execution time for operation : "
<<
operation
<<
" : "
<<
end
.
tv_usec
-
begin
.
tv_usec
<<
" µs"
<<
endl
;
<<
end
.
tv_usec
-
begin
.
tv_usec
<<
" µs"
<<
endl
;
}
}
int
string_to_int
(
string
incomingStr
)
{
int
string_to_int
(
string
incomingStr
)
istringstream
isstmp
(
incomingStr
);
{
int
i
;
istringstream
isstmp
(
incomingStr
);
isstmp
>>
i
;
int
i
;
return
i
;
isstmp
>>
i
;
return
i
;
}
}
string
double_to_string
(
double
incoming
)
string
double_to_string
(
double
incoming
)
{
{
string
result
;
string
result
;
ostringstream
oss
;
ostringstream
oss
;
oss
<<
incoming
;
oss
<<
incoming
;
result
=
oss
.
str
();
result
=
oss
.
str
();
return
result
;
return
result
;
}
}
string
int_to_string
(
int
incoming
)
{
string
int_to_string
(
int
incoming
)
string
result
;
{
ostringstream
oss
;
string
result
;
oss
<<
incoming
;
ostringstream
oss
;
result
=
oss
.
str
();
oss
<<
incoming
;
return
result
;
result
=
oss
.
str
();
return
result
;
}
}
string
pyReplace
(
string
incoming
,
string
pattern
,
string
replacement
)
string
pyReplace
(
string
incoming
,
string
pattern
,
string
replacement
)
{
{
while
(
incoming
.
rfind
(
pattern
)
!=
string
::
npos
)
{
while
(
incoming
.
rfind
(
pattern
)
!=
string
::
npos
)
int
n
=
incoming
.
rfind
(
pattern
);
{
int
l
=
pattern
.
length
();
int
n
=
incoming
.
rfind
(
pattern
);
incoming
.
replace
(
n
,
l
,
replacement
);
int
l
=
pattern
.
length
();
}
return
incoming
;
incoming
.
replace
(
n
,
l
,
replacement
);
}
return
incoming
;
}
}
string
char_to_string
(
char
incoming
)
string
char_to_string
(
char
incoming
)
{
{
string
s
;
string
s
;
stringstream
ss
;
stringstream
ss
;
ss
<<
incoming
;
ss
<<
incoming
;
ss
>>
s
;
ss
>>
s
;
return
s
;
return
s
;
}
}
vector
<
string
>
parseOnSep
(
string
inc
,
string
sep
)
vector
<
string
>
parseOnSep
(
string
inc
,
string
sep
)
{
{
// cerr << "Entering ParseOnSep function" << endl;
// cerr << "Entering ParseOnSep function" << endl;
// cerr << "\tIncoming string : " << inc << " ; separator : " << sep << endl;
// cerr << "\tIncoming string : " << inc << " ; separator : " << sep << endl;
vector
<
string
>
ret
;
vector
<
string
>
ret
;
istringstream
issInc
(
inc
);
istringstream
issInc
(
inc
);
string
mot
;
string
mot
;
while
(
getline
(
issInc
,
mot
,
string_to_char
(
sep
)))
{
while
(
getline
(
issInc
,
mot
,
string_to_char
(
sep
)
)
)
ret
.
push_back
(
mot
);
{
}
ret
.
push_back
(
mot
);
return
ret
;
}
return
ret
;
}
}
char
string_to_char
(
string
inc
)
{
char
string_to_char
(
string
inc
)
char
cstr
[
inc
.
size
()
+
1
];
{
inc
.
copy
(
cstr
,
inc
.
size
()
+
1
);
char
cstr
[
inc
.
size
()
+
1
];
cstr
[
inc
.
size
()]
=
'\0'
;
inc
.
copy
(
cstr
,
inc
.
size
()
+
1
);
return
*
cstr
;
cstr
[
inc
.
size
()]
=
'\0'
;
return
*
cstr
;
}
}
string
strip
(
string
inc
)
{
cerr
<<
"Passing into strip << "
<<
inc
;
string
::
size_type
pos
=
0
;
string
strip
(
string
inc
)
while
((
pos
=
inc
.
find
(
"
\n
"
,
pos
))
!=
string
::
npos
)
{
{
cerr
<<
" ; pos = "
<<
pos
;
cerr
<<
"Passing into strip << "
<<
inc
;
inc
.
erase
(
pos
,
2
);
string
::
size_type
pos
=
0
;
}
while
(
(
pos
=
inc
.
find
(
"
\n
"
,
pos
)
)
!=
string
::
npos
)
cerr
<<
" to "
<<
inc
<<
endl
;
{
return
inc
;
cerr
<<
" ; pos = "
<<
pos
;
inc
.
erase
(
pos
,
2
);
}
cerr
<<
" to "
<<
inc
<<
endl
;
return
inc
;
}
}
double
chisquare
(
vector
<
double
>
toTest
,
vector
<
double
>
all
)
double
chisquare
(
vector
<
double
>
toTest
,
vector
<
double
>
all
)
{
{
boost
::
math
::
chi_squared
chi
(
1
);
boost
::
math
::
chi_squared
chi
(
1
)
;
double
a1
=
toTest
[
0
]
;
double
a1
=
toTest
[
0
]
;
double
a2
=
toTest
[
1
]
;
double
a2
=
toTest
[
1
];
double
b1
=
all
[
0
];
double
b1
=
all
[
0
];
double
b2
=
all
[
1
];
double
b2
=
all
[
1
];
;
;
double
s
=
a1
+
a2
+
b1
+
b2
;
double
K
=
s
*
(
a1
*
b2
-
a2
*
b1
)
*
(
a1
*
b2
-
a2
*
b1
)
/
(
a1
+
a2
)
/
(
b1
+
b2
)
/
(
a1
+
b1
)
/
(
a2
+
b2
);
double
P
=
boost
::
math
::
cdf
(
chi
,
K
);
double
s
=
a1
+
a2
+
b1
+
b2
;
return
P
;
double
K
=
s
*
(
a1
*
b2
-
a2
*
b1
)
*
(
a1
*
b2
-
a2
*
b1
)
/
(
a1
+
a2
)
/
(
b1
+
b2
)
/
(
a1
+
b1
)
/
(
a2
+
b2
);
double
P
=
boost
::
math
::
cdf
(
chi
,
K
);
return
P
;
}
}
double
fisher_test
(
vector
<
double
>
toTest
,
vector
<
double
>
control
)
{
double
fisher_test
(
vector
<
double
>
toTest
,
vector
<
double
>
control
)
double
a
=
toTest
[
0
];
{
double
b
=
toTest
[
1
];
double
a
=
toTest
[
0
];
double
c
=
control
[
0
];
double
b
=
toTest
[
1
];
double
d
=
control
[
1
];
double
c
=
control
[
0
];
double
d
=
control
[
1
];
double
N
=
a
+
b
+
c
+
d
;
double
r
=
a
+
c
;
double
N
=
a
+
b
+
c
+
d
;
double
n
=
c
+
d
;
double
r
=
a
+
c
;
double
max_for_k
=
min
(
r
,
n
);
double
n
=
c
+
d
;
double
min_for_k
=
(
double
)
max
(
0
,
int
(
r
+
n
-
N
));
double
max_for_k
=
min
(
r
,
n
);
hypergeometric_distribution
<>
hgd
(
r
,
n
,
N
);
double
min_for_k
=
(
double
)
max
(
0
,
int
(
r
+
n
-
N
));
double
cutoff
=
pdf
(
hgd
,
c
);
hypergeometric_distribution
<>
hgd
(
r
,
n
,
N
);
double
tmp_p
=
0.0
;
double
cutoff
=
pdf
(
hgd
,
c
);
for
(
int
k
=
min_for_k
;
k
<
max_for_k
+
1
;
k
++
)
{
double
tmp_p
=
0.0
;
double
p
=
pdf
(
hgd
,
k
);
for
(
int
k
=
min_for_k
;
k
<
max_for_k
+
1
;
k
++
)
if
(
p
<=
cutoff
)
{
{
tmp_p
+=
p
;
double
p
=
pdf
(
hgd
,
k
);
}
if
(
p
<=
cutoff
)
}
{
return
tmp_p
;
tmp_p
+=
p
;
}
}
return
tmp_p
;
}
}
char
checkBase
(
char
incoming
)
{
char
checkBase
(
char
incoming
)
if
(
incoming
==
'c'
)
{
{
return
'C'
;
if
(
incoming
==
'c'
)
}
{
if
(
incoming
==
't'
)
{
return
'C'
;
return
'T'
;
}
}
if
(
incoming
==
't'
)
if
(
incoming
==
'a'
)
{
{
return
'A'
;
return
'T'
;
}
}
if
(
incoming
==
'g'
)
{
if
(
incoming
==
'a'
)
return
'G'
;
{
}
return
'A'
;
if
(
incoming
==
'n'
)
{
}
return
'N'
;
if
(
incoming
==
'g'
)
}
{
if
(
incoming
==
'C'
)
{
return
'G'
;
return
'C'
;
}
}
if
(
incoming
==
'n'
)
if
(
incoming
==
'T'
)
{
{
return
'T'
;
return
'N'
;
}
}
if
(
incoming
==
'A'
)
{
if
(
incoming
==
'C'
)
return
'A'
;
{
}
return
'C'
;
if
(
incoming
==
'G'
)
{
}
return
'G'
;
if
(
incoming
==
'T'
)
}
{
if
(
incoming
==
'N'
)
{
return
'T'
;
return
'N'
;
}
}
if
(
incoming
==
'A'
)
return
'N'
;
{
return
'A'
;
}
if
(
incoming
==
'G'
)
{
return
'G'
;
}
if
(
incoming
==
'N'
)
{
return
'N'
;
}
return
'N'
;
}
}
// Method for calculating a sd from a vector of double
double
sd_calculator
(
vector
<
double
>
incVector
)
{
//Method for calculating a sd from a vector of double
// Déclarations
double
sd_calculator
(
vector
<
double
>
incVector
)
double
sd
;
{
double
temp_value
;
// Déclarations
double
sumone
=
0
;
double
sd
;
double
sumtwo
=
0
;
double
temp_value
;
double
moyenne
;
double
sumone
=
0
;
int
number
=
0
;
double
sumtwo
=
0
;
double
variance
;
double
moyenne
;
int
number
=
0
;
// calcul des moyennes et moyennes carrées
double
variance
;
vector
<
double
>::
iterator
myIter
;
for
(
myIter
=
incVector
.
begin
();
myIter
!=
incVector
.
end
();
myIter
++
)
{
// calcul des moyennes et moyennes carrées
temp_value
=
*
myIter
;
vector
<
double
>::
iterator
myIter
;
for
(
myIter
=
incVector
.
begin
()
;
myIter
!=
incVector
.
end
()
;
myIter
++
)
sumone
+=
temp_value
;
{
sumtwo
+=
(
temp_value
*
temp_value
);
temp_value
=
*
myIter
;
number
++
;
}
sumone
+=
temp_value
;
sumtwo
+=
(
temp_value
*
temp_value
);
// calcul de la moyenne
number
++
;
moyenne
=
sumone
/
number
;
}
// Calcul de la variance
variance
=
(
sumtwo
/
number
)
-
(
moyenne
*
moyenne
);
// calcul de la moyenne
// Calcul ecart type
moyenne
=
sumone
/
number
;
sd
=
sqrt
(
variance
);
// Calcul de la variance
variance
=
(
sumtwo
/
number
)
-
(
moyenne
*
moyenne
);
return
sd
;
// Calcul ecart type
sd
=
sqrt
(
variance
);
return
sd
;
}
}
// Method for calculating a mean from a vector of double
// Method for calculating a mean from a vector of double
double
moyenne_calculator
(
vector
<
double
>
incVector
)
double
moyenne_calculator
(
vector
<
double
>
incVector
)
{
{
// Déclarations
// Déclarations
double
temp_value
;
double
temp_value
;
double
sumone
;
double
sumone
;
double
moyenne
;
double
moyenne
;
int
number
=
0
;
int
number
=
0
;
// calcul des moyennes et moyennes carrées
// calcul des moyennes et moyennes carrées
vector
<
double
>::
iterator
myIter
;
vector
<
double
>::
iterator
myIter
;
for
(
myIter
=
incVector
.
begin
();
myIter
!=
incVector
.
end
();
myIter
++
)
{
for
(
myIter
=
incVector
.
begin
()
;
myIter
!=
incVector
.
end
()
;
myIter
++
)
temp_value
=
*
myIter
;
{
sumone
+=
temp_value
;
temp_value
=
*
myIter
;
number
++
;
sumone
+=
temp_value
;
}
number
++
;
}
// calcul de la moyenne
if
(
number
!=
0
)
{
// calcul de la moyenne
moyenne
=
sumone
/
number
;
if
(
number
!=
0
)
}
else
{
{
return
0
;
moyenne
=
sumone
/
number
;
}
}
return
moyenne
;
else
{
return
0
;
}
return
moyenne
;
}
}
// Method for calculating fisher exact test 2-sided, return the pvalue.
// Method for calculating fisher exact test 2-sided, return the pvalue.
double
FET
(
int
a
,
int
b
,
int
c
,
int
d
)
double
FET
(
int
a
,
int
b
,
int
c
,
int
d
)
{
{
int
n
=
a
+
b
+
c
+
d
;
int
n
=
a
+
b
+
c
+
d
;
double
logpCutOff
=
logHypergeometricProb
(
a
,
b
,
c
,
d
);
double
logpCutOff
=
logHypergeometricProb
(
a
,
b
,
c
,
d
);
double
pFraction
=
0
;
double
pFraction
=
0
;
double
logpValue
=
0
;
double
logpValue
=
0
;
for
(
int
x
=
0
;
x
<=
n
;
x
++
)
{
for
(
int
x
=
0
;
x
<=
n
;
x
++
)
if
((
a
+
b
-
x
>=
0
)
&&
(
a
+
c
-
x
>=
0
)
&&
(
d
-
a
+
x
>=
0
))
{
{
double
l
=
logHypergeometricProb
(
x
,
a
+
b
-
x
,
a
+
c
-
x
,
d
-
a
+
x
);
if
(
(
a
+
b
-
x
>=
0
)
&&
(
a
+
c
-
x
>=
0
)
&&
(
d
-
a
+
x
>=
0
)
)
if
(
l
<=
logpCutOff
)
{
{
pFraction
+=
exp
(
l
-
logpCutOff
);
double
l
=
logHypergeometricProb
(
x
,
a
+
b
-
x
,
a
+
c
-
x
,
d
-
a
+
x
);
}
if
(
l
<=
logpCutOff
)
}
{
}
pFraction
+=
exp
(
l
-
logpCutOff
);
logpValue
=
logpCutOff
+
log
(
pFraction
);
}
}
return
exp
(
logpValue
);
}
logpValue
=
logpCutOff
+
log
(
pFraction
);
return
exp
(
logpValue
);
}
}
// method for calculating the hypergeometrical log value for the FET.
// method for calculating the hypergeometrical log value for the FET.
double
logHypergeometricProb
(
int
a
,
int
b
,
int
c
,
int
d
)
double
logHypergeometricProb
(
int
a
,
int
b
,
int
c
,
int
d
)
{
{
return
logFactoriel
(
a
+
b
)
+
logFactoriel
(
c
+
d
)
+
logFactoriel
(
a
+
c
)
+
return
logFactoriel
(
a
+
b
)
+
logFactoriel
(
c
+
d
)
+
logFactoriel
(
a
+
c
)
+
logFactoriel
(
b
+
d
)
-
logFactoriel
(
a
)
-
logFactoriel
(
b
)
-
logFactoriel
(
c
)
-
logFactoriel
(
d
)
-
logFactoriel
(
a
+
b
+
c
+
d
);
logFactoriel
(
b
+
d
)
-
logFactoriel
(
a
)
-
logFactoriel
(
b
)
-
logFactoriel
(
c
)
-
logFactoriel
(
d
)
-
logFactoriel
(
a
+
b
+
c
+
d
);
}
}
// Method for calculating a log factoriel
// Method for calculating a log factoriel
double
logFactoriel
(
int
inc
)
double
logFactoriel
(
int
inc
)
{
{
double
ret
;
double
ret
;
for
(
ret
=
0
;
inc
>
0
;
inc
--
)
{
for
(
ret
=
0
;
inc
>
0
;
inc
--
)
ret
+=
log
((
double
)
inc
);
{
}
ret
+=
log
(
(
double
)
inc
);
return
ret
;
}
return
ret
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment