Skip to content

Commit

Permalink
Added meta example features-char-string
Browse files Browse the repository at this point in the history
  • Loading branch information
avramidis committed Feb 14, 2019
1 parent 143b15e commit c0b5dc8
Show file tree
Hide file tree
Showing 25 changed files with 131 additions and 55 deletions.
2 changes: 1 addition & 1 deletion doc/readme/INTERFACES.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ This needs `shogun.oct` to be visible, which is either in `path/to/build/src/int

Running an example:

python path/to/octave_example.py
octave path/to/octave_example.m

### Ruby
This needs `shogun.rb` to be visible, which is either in `path/to/build/src/interfaces/ruby_modular/` or in something similar to `path/to/shogun-install/lib/x86_64-linux-gnu/site_ruby`
Expand Down
3 changes: 2 additions & 1 deletion examples/meta/generator/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def parse(self, programString, filePath=None):
'ShortRealMatrix': 'SHOGUNSGTYPE',
'RealMatrix': 'SHOGUNSGTYPE',
'LongRealMatrix': 'SHOGUNSGTYPE',
'ComplexMatrix': 'SHOGUNSGTYPE'
'ComplexMatrix': 'SHOGUNSGTYPE',
'StringCharList':'SHOGUNSGTYPE'
}

t_INTLITERAL = "-?[0-9]+"
Expand Down
9 changes: 7 additions & 2 deletions examples/meta/generator/targets/cpp.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
"Type": {
"RealFeatures": "DenseFeatures<float64_t>",
"RealSubsetFeatures": "DenseSubsetFeatures<float64_t>",
"StringCharFeatures": "CStringFeatures<char>",
"StringCharFeatures": "StringFeatures<char>",
"StringCharList": "SGStringList<char>",
"Default": "$typeName",
"bool": "bool",
"string": "char*",
Expand Down Expand Up @@ -96,7 +97,11 @@
"get_int_vector": "$object->get<SGVector<int32_t>>($arguments)",
"get_real": "$object->get<float64_t>($arguments)",
"get_real_vector": "$object->get<SGVector<float64_t>>($arguments)",
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)"
"get_real_matrix": "$object->get<SGMatrix<float64_t>>($arguments)",
"get_int_string_list": "$object->get<SGStringList<int32_t>>($arguments)",
"get_bool_string_list": "$object->get<SGStringList<bool>>($arguments)",
"get_char_string_list": "$object->get<SGStringList<char>>($arguments)",
"get_real_string_list": "$object->get<SGStringList<float64_t>>($arguments)"
},
"StaticCall": "C$typeName::$method($arguments)",
"GlobalCall": "$method($arguments)",
Expand Down
3 changes: 2 additions & 1 deletion examples/meta/generator/targets/csharp.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
"ShortRealMatrix": "float[,]",
"RealMatrix": "double[,]",
"LongRealMatrix": "double[,]",
"ComplexMatrix": "Complex[,]"
"ComplexMatrix": "Complex[,]",
"StringCharList": "String[]"
},
"Expr": {
"StringLiteral": "\"$literal\"",
Expand Down
6 changes: 4 additions & 2 deletions examples/meta/generator/targets/java.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IncludeEnums": true,
"IncludeGlobalFunctions": false,
"DependencyListElement": "import org.shogun.$typeName;",
"DependencyListSeparator": "\n"
"DependencyListSeparator": "\n",
"ExcludeImport": ["StringCharList"]
},
"Statement": "$statement;\n",
"Comment": "//$comment\n",
Expand Down Expand Up @@ -68,7 +69,8 @@
"ShortRealMatrix": "FloatMatrix",
"RealMatrix": "DoubleMatrix",
"LongRealMatrix": "DoubleMatrix",
"ComplexMatrix": "DoubleMatrix"
"ComplexMatrix": "DoubleMatrix",
"StringCharList": "String[]"
},
"Expr": {
"StringLiteral": "\"$literal\"",
Expand Down
2 changes: 1 addition & 1 deletion examples/meta/generator/targets/octave.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"Program": "shogun\n\n$program",
"Program": "shogun;\n\n$program",
"Statement": "$statement;\n",
"Comment": "%$comment\n",
"Init": {
Expand Down
6 changes: 5 additions & 1 deletion examples/meta/generator/targets/python.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@
"get_int_vector": "$object.get($arguments)",
"get_real": "$object.get($arguments)",
"get_real_vector": "$object.get($arguments)",
"get_real_matrix": "$object.get($arguments)"
"get_real_matrix": "$object.get($arguments)",
"get_bool_string_list": "$object.get($arguments)",
"get_char_string_list": "$object.get($arguments)",
"get_real_string_list": "$object.get($arguments)",
"get_int_string_list": "$object.get($arguments)"
},
"StaticCall": "$typeName.$method($arguments)",
"GlobalCall": "$method($arguments$kwargs)",
Expand Down
4 changes: 4 additions & 0 deletions examples/meta/generator/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,10 @@ def dependenciesString(self, allClasses, interfacedClasses, enums,
if self.targetDict["Dependencies"].get("IncludeGlobalFunctions"):
dependencies = dependencies.union(globalFunctions)

if "ExcludeImport" in self.targetDict["Dependencies"]:
for item in self.targetDict["Dependencies"].get("ExcludeImport"):
dependencies.discard(item)

dependencies = list(dependencies)

translations = list(set(map(self.translateDependencyElement, dependencies)))
Expand Down
11 changes: 11 additions & 0 deletions examples/meta/src/features/string_char.sg
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
File words = csv_file("../../data/words.dat")

#![create_features]
Features f = string_features(words, enum EAlphabet.RAWBYTE)
#![create_features]

#![output stat]
int max_string_length = f.get_int("max_string_length")
int number_of_strings = f.get_int("num_vectors")
StringCharList string_list = f.get_char_string_list("string_list")
#![output stat]
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_char.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def features_string_char (strings):
f.set_feature_vector(array(['t','e','s','t']), 0)

#print("strings", f.get_features())
return f.get_features(), f
return f.get_string_list(), f

if __name__=='__main__':
print('StringCharFeatures')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def features_string_file (directory, fname):
#or load fasta file
#f.load_fasta('fasta.fa')
#print(f.get_features())
return f.get_features(), f
return f.get_string_list(), f

if __name__=='__main__':
print('StringWordFeatures')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_ulong.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def features_string_ulong (start=0,order=2,gap=0,rev=False):
uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0)


return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors()
return uf.get_string_list(),uf.get_feature_vector(2), uf.get_num_vectors()

if __name__=='__main__':
print('simple_longint')
Expand Down
2 changes: 1 addition & 1 deletion examples/undocumented/python/features_string_word.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def features_string_word (strings, start, order, gap, rev):
wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0)

#print("strings", wf.get_features())
return wf.get_features(), wf
return wf.get_string_list(), wf

if __name__=='__main__':
print('StringWordFeatures')
Expand Down
2 changes: 1 addition & 1 deletion src/interfaces/java/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ TYPEMAP_STRINGFEATURES(int64_t, int, Int, jint, "Int[][]", "[[I")
TYPEMAP_STRINGFEATURES(uint64_t, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(long long, long, Long, jlong, "Long[][]", "[[J")
TYPEMAP_STRINGFEATURES(float32_t, float, Float, jfloat, "Float[][]", "[[F")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Doulbe[][]", "[[D")
TYPEMAP_STRINGFEATURES(float64_t, double, Double, jdouble, "Double[][]", "[[D")

#undef TYPEMAP_STRINGFEATURES

Expand Down
47 changes: 38 additions & 9 deletions src/interfaces/octave/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -368,21 +368,50 @@ TYPEMAP_STRINGFEATURES_IN(is_matrix_type() && arg.is_uint16_type, uint16NDArray,

/* output typemap for CStringFeatures */
%define TYPEMAP_STRINGFEATURES_OUT(type,typecode)
%typemap(out) shogun::SGStringList<char>
{
/* TODO STRING OUT TYPEMAPS */

This comment has been minimized.

Copy link
@karlnapf

karlnapf Feb 16, 2019

Member

Still todo?

shogun::SGString<char>* str = $1.strings;
int32_t i, num_strings = $1.num_strings;

Cell c(num_strings, 1);

for (i = 0; i < num_strings; i++) {
c(i)=std::string(str[i].string);
SG_FREE(str[i].string);
}
SG_FREE(str);

$result = c;
}

%typemap(out) shogun::SGStringList<type>
{
/* TODO STRING OUT TYPEMAPS */
shogun::SGString<type>* str = $1.strings;
int32_t i, num_strings = $1.num_strings;

ColumnVector c(dim_vector(num_strings, 1));

for (i = 0; i < num_strings; i++) {
c(i)=*str[i].string;
SG_FREE(str[i].string);
}
SG_FREE(str);

$result = c;
}
%enddef

TYPEMAP_STRINGFEATURES_OUT(char, charMatrix)
TYPEMAP_STRINGFEATURES_OUT(uint8_t, uint8NDArray)
TYPEMAP_STRINGFEATURES_OUT(int16_t, int16NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint16_t, uint16NDArray)
TYPEMAP_STRINGFEATURES_OUT(int32_t, int32NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint32_t, uint32NDArray)
TYPEMAP_STRINGFEATURES_OUT(int64_t, int64NDArray)
TYPEMAP_STRINGFEATURES_OUT(uint64_t, uint64NDArray)
TYPEMAP_STRINGFEATURES_OUT(float64_t, Matrix)
TYPEMAP_STRINGFEATURES_OUT(char, Cell)
TYPEMAP_STRINGFEATURES_OUT(uint8_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint16_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint32_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(int64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(uint64_t, ColumnVector)
TYPEMAP_STRINGFEATURES_OUT(float64_t, ColumnVector)

#undef TYPEMAP_STRINGFEATURES_OUT

Expand Down
5 changes: 4 additions & 1 deletion src/interfaces/python/swig_typemaps.i
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,10 @@ _GETTERS = ["get",
"get_int",
"get_real_matrix",
"get_real_vector",
"get_int_vector"
"get_int_vector",
"get_bool_string_list",
"get_char_string_list",
"get_int_string_list"
]

_FACTORIES = ["distance",
Expand Down
3 changes: 3 additions & 0 deletions src/interfaces/swig/shogun.i
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ namespace shogun
%template(get_real) CSGObject::get<float64_t, void>;
%template(get_int) CSGObject::get<int32_t, void>;
%template(get_real_matrix) CSGObject::get<SGMatrix<float64_t>, void>;
%template(get_bool_string_list) CSGObject::get<SGStringList<bool>, void>;
%template(get_char_string_list) CSGObject::get<SGStringList<char>, void>;
%template(get_int_string_list) CSGObject::get<SGStringList<int32_t>, void>;

#ifndef SWIGJAVA
%template(get_real_vector) CSGObject::get<SGVector<float64_t>, void>;
Expand Down
8 changes: 5 additions & 3 deletions src/shogun/features/StringFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ template<class ST> EFeatureClass CStringFeatures<ST>::get_feature_class() const

template<class ST> EFeatureType CStringFeatures<ST>::get_feature_type() const { return F_UNKNOWN; }

template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet()
template<class ST> CAlphabet* CStringFeatures<ST>::get_alphabet() const
{
SG_REF(alphabet);
return alphabet;
Expand Down Expand Up @@ -983,15 +983,15 @@ template<class ST> bool CStringFeatures<ST>::append_features(SGString<ST>* p_fea
return false;
}

template<class ST> SGStringList<ST> CStringFeatures<ST>::get_features()
template<class ST> SGStringList<ST> CStringFeatures<ST>::get_string_list() const
{
SGStringList<ST> sl(NULL,0,0,false);

sl.strings=get_features(sl.num_strings, sl.max_string_length);
return sl;
}

template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len)
template<class ST> SGString<ST>* CStringFeatures<ST>::get_features(int32_t& num_str, int32_t& max_str_len) const
{
if (m_subset_stack->has_subsets())
SG_ERROR("get features() is not possible on subset")
Expand Down Expand Up @@ -1695,6 +1695,8 @@ template<class ST> void CStringFeatures<ST>::init()

m_parameters->add_vector(&symbol_mask_table, &symbol_mask_table_len, "mask_table", "Symbol mask table - using in higher order mapping");
watch_param("mask_table", &symbol_mask_table, &symbol_mask_table_len);
watch_method("num_vectors", &CStringFeatures::get_num_vectors);
watch_method("string_list", &CStringFeatures::get_string_list);
}

/** get feature type the char feature can deal with
Expand Down
10 changes: 5 additions & 5 deletions src/shogun/features/StringFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ template <class ST> class CStringFeatures : public CFeatures
*
* @return alphabet
*/
CAlphabet* get_alphabet();
CAlphabet* get_alphabet() const;

/** duplicate feature object
*
Expand Down Expand Up @@ -419,10 +419,10 @@ template <class ST> class CStringFeatures : public CFeatures
bool append_features(SGString<ST>* p_features, int32_t p_num_vectors,
int32_t p_max_string_length);

/** get_features
* @return features
/** get_string_list
* @return string_list
*/
SGStringList<ST> get_features();
SGStringList<ST> get_string_list() const;

/** get_features
*
Expand All @@ -432,7 +432,7 @@ template <class ST> class CStringFeatures : public CFeatures
* @param max_str_len maximal string length (returned)
* @return string features
*/
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len);
virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len) const;

/** copy_features
*
Expand Down
18 changes: 18 additions & 0 deletions src/shogun/lib/SGStringList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ SGStringList<T> SGStringList<T>::clone() const
return SGStringList<T>(strings, num_strings, max_string_length);
}

template <class T>
bool SGStringList<T>::equals(const SGStringList<T>& other) const
{
if (this->num_strings!=other.num_strings)
return false;

if (this->max_string_length!=other.max_string_length)
return false;

for (auto i : range(num_strings))
{
if (!this->strings[i].equals(other.strings[i]))
return false;
}

return true;
}

template class SGStringList<bool>;
template class SGStringList<char>;
template class SGStringList<int8_t>;
Expand Down
8 changes: 8 additions & 0 deletions src/shogun/lib/SGStringList.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ template <class T> class SGStringList : public SGReferencedData
*/
SGStringList<T> clone() const;


/** Equals method
* @param other SGStringList to compare with
* @return false iff the number of strings, the maximum string length or
* any of the string items are different, true otherwise
*/
bool equals(const SGStringList<T>& other) const;

protected:

/** copy data */
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/preprocessor/StringPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ namespace shogun
string_features = new CStringFeatures<ST>(*string_features);
}

auto string_list = string_features->get_features();
auto string_list = string_features->get_string_list();

apply_to_string_list(string_list);

Expand Down
6 changes: 3 additions & 3 deletions src/shogun/util/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,16 +106,16 @@ namespace shogun
}

CFeatures* string_features(
CFile* file, EAlphabet alpha = DNA,
EPrimitiveType primitive_type = PT_CHAR)
CFile* file, machine_int_t alphabet_type = DNA,
machine_int_t primitive_type = PT_CHAR)
{
REQUIRE(file, "No file provided.\n");
CFeatures* result = nullptr;

switch (primitive_type)
{
case PT_CHAR:
result = new CStringFeatures<char>(file, alpha);
result = new CStringFeatures<char>(file, static_cast<EAlphabet>(alphabet_type));
break;
default:
SG_SNOTIMPLEMENTED
Expand Down
Loading

0 comments on commit c0b5dc8

Please sign in to comment.