Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
vihgo
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gad-public
vihgo
Commits
d066f640
Commit
d066f640
authored
4 years ago
by
Yannis Duffourd
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Code reformating for clang complience
parent
37344ab2
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
79 additions
and
129 deletions
+79
-129
utils.cpp
cpp/utils.cpp
+79
-129
No files found.
cpp/utils.cpp
View file @
d066f640
// utility functions for bioinformatics
#include <iostream>
#include <string>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <cstring>
#include <algorithm>
#include <cmath>
#include <string>
#include <sys/time.h>
#include <sys/types.h>
...
...
@@ -16,37 +16,33 @@
#include "utils.h"
using
namespace
std
;
using
namespace
boost
::
math
;
using
boost
::
math
::
cdf
;
using
boost
::
math
::
chi_squared
;
using
boost
::
math
::
quantile
;
using
boost
::
math
::
complement
;
using
boost
::
math
::
cdf
;
using
boost
::
math
::
quantile
;
// test if a file is readable
// return value : true if readable ; false if not
bool
IsFileReadable
(
string
file
)
{
ifstream
fichier
(
file
.
c_str
()
);
bool
IsFileReadable
(
string
file
)
{
ifstream
fichier
(
file
.
c_str
());
return
!
fichier
.
fail
();
}
// display a time lenth in µs
// return value : void
void
ExecMeasure
(
struct
timeval
begin
,
struct
timeval
end
,
string
operation
)
{
cerr
<<
"Execution time for operation : "
<<
operation
<<
" : "
<<
end
.
tv_usec
-
begin
.
tv_usec
<<
" µs"
<<
endl
;
void
ExecMeasure
(
struct
timeval
begin
,
struct
timeval
end
,
string
operation
)
{
cerr
<<
"Execution time for operation : "
<<
operation
<<
" : "
<<
end
.
tv_usec
-
begin
.
tv_usec
<<
" µs"
<<
endl
;
}
int
string_to_int
(
string
incomingStr
)
{
istringstream
isstmp
(
incomingStr
);
int
string_to_int
(
string
incomingStr
)
{
istringstream
isstmp
(
incomingStr
);
int
i
;
isstmp
>>
i
;
return
i
;
}
string
double_to_string
(
double
incoming
)
{
string
double_to_string
(
double
incoming
)
{
string
result
;
ostringstream
oss
;
oss
<<
incoming
;
...
...
@@ -54,9 +50,7 @@ string double_to_string( double incoming )
return
result
;
}
string
int_to_string
(
int
incoming
)
{
string
int_to_string
(
int
incoming
)
{
string
result
;
ostringstream
oss
;
oss
<<
incoming
;
...
...
@@ -64,20 +58,16 @@ string int_to_string( int incoming )
return
result
;
}
string
pyReplace
(
string
incoming
,
string
pattern
,
string
replacement
)
{
while
(
incoming
.
rfind
(
pattern
)
!=
string
::
npos
)
{
int
n
=
incoming
.
rfind
(
pattern
);
string
pyReplace
(
string
incoming
,
string
pattern
,
string
replacement
)
{
while
(
incoming
.
rfind
(
pattern
)
!=
string
::
npos
)
{
int
n
=
incoming
.
rfind
(
pattern
);
int
l
=
pattern
.
length
();
incoming
.
replace
(
n
,
l
,
replacement
);
incoming
.
replace
(
n
,
l
,
replacement
);
}
return
incoming
;
}
string
char_to_string
(
char
incoming
)
{
string
char_to_string
(
char
incoming
)
{
string
s
;
stringstream
ss
;
ss
<<
incoming
;
...
...
@@ -85,66 +75,54 @@ string char_to_string(char incoming)
return
s
;
}
vector
<
string
>
parseOnSep
(
string
inc
,
string
sep
)
{
vector
<
string
>
parseOnSep
(
string
inc
,
string
sep
)
{
// cerr << "Entering ParseOnSep function" << endl;
// cerr << "\tIncoming string : " << inc << " ; separator : " << sep << endl;
vector
<
string
>
ret
;
istringstream
issInc
(
inc
);
istringstream
issInc
(
inc
);
string
mot
;
while
(
getline
(
issInc
,
mot
,
string_to_char
(
sep
)
)
)
{
ret
.
push_back
(
mot
);
while
(
getline
(
issInc
,
mot
,
string_to_char
(
sep
)))
{
ret
.
push_back
(
mot
);
}
return
ret
;
}
char
string_to_char
(
string
inc
)
{
char
string_to_char
(
string
inc
)
{
char
cstr
[
inc
.
size
()
+
1
];
inc
.
copy
(
cstr
,
inc
.
size
()
+
1
);
cstr
[
inc
.
size
()]
=
'\0'
;
return
*
cstr
;
}
string
strip
(
string
inc
)
{
cerr
<<
"Passing into strip << "
<<
inc
;
string
strip
(
string
inc
)
{
cerr
<<
"Passing into strip << "
<<
inc
;
string
::
size_type
pos
=
0
;
while
(
(
pos
=
inc
.
find
(
"
\n
"
,
pos
)
)
!=
string
::
npos
)
{
cerr
<<
" ; pos = "
<<
pos
;
inc
.
erase
(
pos
,
2
);
while
((
pos
=
inc
.
find
(
"
\n
"
,
pos
))
!=
string
::
npos
)
{
cerr
<<
" ; pos = "
<<
pos
;
inc
.
erase
(
pos
,
2
);
}
cerr
<<
" to "
<<
inc
<<
endl
;
return
inc
;
}
double
chisquare
(
vector
<
double
>
toTest
,
vector
<
double
>
all
)
{
double
chisquare
(
vector
<
double
>
toTest
,
vector
<
double
>
all
)
{
boost
::
math
::
chi_squared
chi
(
1
);
double
a1
=
toTest
[
0
]
;
double
a1
=
toTest
[
0
]
;
double
a2
=
toTest
[
1
];
double
b1
=
all
[
0
];
double
b2
=
all
[
1
];
;
double
b2
=
all
[
1
]
;
;
double
s
=
a1
+
a2
+
b1
+
b2
;
double
K
=
s
*
(
a1
*
b2
-
a2
*
b1
)
*
(
a1
*
b2
-
a2
*
b1
)
/
(
a1
+
a2
)
/
(
b1
+
b2
)
/
(
a1
+
b1
)
/
(
a2
+
b2
);
double
K
=
s
*
(
a1
*
b2
-
a2
*
b1
)
*
(
a1
*
b2
-
a2
*
b1
)
/
(
a1
+
a2
)
/
(
b1
+
b2
)
/
(
a1
+
b1
)
/
(
a2
+
b2
);
double
P
=
boost
::
math
::
cdf
(
chi
,
K
);
return
P
;
}
double
fisher_test
(
vector
<
double
>
toTest
,
vector
<
double
>
control
)
{
double
fisher_test
(
vector
<
double
>
toTest
,
vector
<
double
>
control
)
{
double
a
=
toTest
[
0
];
double
b
=
toTest
[
1
];
double
c
=
control
[
0
];
...
...
@@ -158,68 +136,51 @@ double fisher_test(vector<double> toTest, vector<double> control )
hypergeometric_distribution
<>
hgd
(
r
,
n
,
N
);
double
cutoff
=
pdf
(
hgd
,
c
);
double
tmp_p
=
0.0
;
for
(
int
k
=
min_for_k
;
k
<
max_for_k
+
1
;
k
++
)
{
for
(
int
k
=
min_for_k
;
k
<
max_for_k
+
1
;
k
++
)
{
double
p
=
pdf
(
hgd
,
k
);
if
(
p
<=
cutoff
)
{
if
(
p
<=
cutoff
)
{
tmp_p
+=
p
;
}
}
return
tmp_p
;
}
char
checkBase
(
char
incoming
)
{
if
(
incoming
==
'c'
)
{
char
checkBase
(
char
incoming
)
{
if
(
incoming
==
'c'
)
{
return
'C'
;
}
if
(
incoming
==
't'
)
{
if
(
incoming
==
't'
)
{
return
'T'
;
}
if
(
incoming
==
'a'
)
{
if
(
incoming
==
'a'
)
{
return
'A'
;
}
if
(
incoming
==
'g'
)
{
if
(
incoming
==
'g'
)
{
return
'G'
;
}
if
(
incoming
==
'n'
)
{
if
(
incoming
==
'n'
)
{
return
'N'
;
}
if
(
incoming
==
'C'
)
{
if
(
incoming
==
'C'
)
{
return
'C'
;
}
if
(
incoming
==
'T'
)
{
if
(
incoming
==
'T'
)
{
return
'T'
;
}
if
(
incoming
==
'A'
)
{
if
(
incoming
==
'A'
)
{
return
'A'
;
}
if
(
incoming
==
'G'
)
{
if
(
incoming
==
'G'
)
{
return
'G'
;
}
if
(
incoming
==
'N'
)
{
if
(
incoming
==
'N'
)
{
return
'N'
;
}
return
'N'
;
}
//Method for calculating a sd from a vector of double
double
sd_calculator
(
vector
<
double
>
incVector
)
{
// Method for calculating a sd from a vector of double
double
sd_calculator
(
vector
<
double
>
incVector
)
{
// Déclarations
double
sd
;
double
temp_value
;
...
...
@@ -231,19 +192,18 @@ double sd_calculator( vector<double> incVector )
// calcul des moyennes et moyennes carrées
vector
<
double
>::
iterator
myIter
;
for
(
myIter
=
incVector
.
begin
()
;
myIter
!=
incVector
.
end
()
;
myIter
++
)
{
temp_value
=
*
myIter
;
for
(
myIter
=
incVector
.
begin
();
myIter
!=
incVector
.
end
();
myIter
++
)
{
temp_value
=
*
myIter
;
sumone
+=
temp_value
;
sumtwo
+=
(
temp_value
*
temp_value
);
number
++
;
sumtwo
+=
(
temp_value
*
temp_value
);
number
++
;
}
// calcul de la moyenne
moyenne
=
sumone
/
number
;
// Calcul de la variance
variance
=
(
sumtwo
/
number
)
-
(
moyenne
*
moyenne
);
variance
=
(
sumtwo
/
number
)
-
(
moyenne
*
moyenne
);
// Calcul ecart type
sd
=
sqrt
(
variance
);
...
...
@@ -251,8 +211,7 @@ double sd_calculator( vector<double> incVector )
}
// Method for calculating a mean from a vector of double
double
moyenne_calculator
(
vector
<
double
>
incVector
)
{
double
moyenne_calculator
(
vector
<
double
>
incVector
)
{
// Déclarations
double
temp_value
;
double
sumone
;
...
...
@@ -261,62 +220,53 @@ double moyenne_calculator( vector<double> incVector )
// calcul des moyennes et moyennes carrées
vector
<
double
>::
iterator
myIter
;
for
(
myIter
=
incVector
.
begin
()
;
myIter
!=
incVector
.
end
()
;
myIter
++
)
{
temp_value
=
*
myIter
;
for
(
myIter
=
incVector
.
begin
();
myIter
!=
incVector
.
end
();
myIter
++
)
{
temp_value
=
*
myIter
;
sumone
+=
temp_value
;
number
++
;
number
++
;
}
// calcul de la moyenne
if
(
number
!=
0
)
{
if
(
number
!=
0
)
{
moyenne
=
sumone
/
number
;
}
else
{
}
else
{
return
0
;
}
return
moyenne
;
}
// Method for calculating fisher exact test 2-sided, return the pvalue.
double
FET
(
int
a
,
int
b
,
int
c
,
int
d
)
{
double
FET
(
int
a
,
int
b
,
int
c
,
int
d
)
{
int
n
=
a
+
b
+
c
+
d
;
double
logpCutOff
=
logHypergeometricProb
(
a
,
b
,
c
,
d
);
double
logpCutOff
=
logHypergeometricProb
(
a
,
b
,
c
,
d
);
double
pFraction
=
0
;
double
logpValue
=
0
;
for
(
int
x
=
0
;
x
<=
n
;
x
++
)
{
if
(
(
a
+
b
-
x
>=
0
)
&&
(
a
+
c
-
x
>=
0
)
&&
(
d
-
a
+
x
>=
0
)
)
{
double
l
=
logHypergeometricProb
(
x
,
a
+
b
-
x
,
a
+
c
-
x
,
d
-
a
+
x
);
if
(
l
<=
logpCutOff
)
{
pFraction
+=
exp
(
l
-
logpCutOff
);
for
(
int
x
=
0
;
x
<=
n
;
x
++
)
{
if
((
a
+
b
-
x
>=
0
)
&&
(
a
+
c
-
x
>=
0
)
&&
(
d
-
a
+
x
>=
0
))
{
double
l
=
logHypergeometricProb
(
x
,
a
+
b
-
x
,
a
+
c
-
x
,
d
-
a
+
x
);
if
(
l
<=
logpCutOff
)
{
pFraction
+=
exp
(
l
-
logpCutOff
);
}
}
}
logpValue
=
logpCutOff
+
log
(
pFraction
);
logpValue
=
logpCutOff
+
log
(
pFraction
);
return
exp
(
logpValue
);
}
// method for calculating the hypergeometrical log value for the FET.
double
logHypergeometricProb
(
int
a
,
int
b
,
int
c
,
int
d
)
{
return
logFactoriel
(
a
+
b
)
+
logFactoriel
(
c
+
d
)
+
logFactoriel
(
a
+
c
)
+
logFactoriel
(
b
+
d
)
-
logFactoriel
(
a
)
-
logFactoriel
(
b
)
-
logFactoriel
(
c
)
-
logFactoriel
(
d
)
-
logFactoriel
(
a
+
b
+
c
+
d
);
double
logHypergeometricProb
(
int
a
,
int
b
,
int
c
,
int
d
)
{
return
logFactoriel
(
a
+
b
)
+
logFactoriel
(
c
+
d
)
+
logFactoriel
(
a
+
c
)
+
logFactoriel
(
b
+
d
)
-
logFactoriel
(
a
)
-
logFactoriel
(
b
)
-
logFactoriel
(
c
)
-
logFactoriel
(
d
)
-
logFactoriel
(
a
+
b
+
c
+
d
);
}
// Method for calculating a log factoriel
double
logFactoriel
(
int
inc
)
{
double
logFactoriel
(
int
inc
)
{
double
ret
;
for
(
ret
=
0
;
inc
>
0
;
inc
--
)
{
ret
+=
log
(
(
double
)
inc
);
for
(
ret
=
0
;
inc
>
0
;
inc
--
)
{
ret
+=
log
((
double
)
inc
);
}
return
ret
;
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment