Current File : //proc/self/root/kunden/usr/share/doc/libthai-devel/html/thbrk_8h.html |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.9.1"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>libthai: thai/thbrk.h File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">libthai
 <span id="projectnumber">0.1.28</span>
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.9.1 -->
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',false,false,'search.php','Search');
});
/* @license-end */</script>
<div id="main-nav"></div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_27634488b513a0cc2afc6ccdc5595e06.html">thai</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#func-members">Functions</a> </div>
<div class="headertitle">
<div class="title">thbrk.h File Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>Thai word segmentation.
<a href="#details">More...</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:a5025d40315abede7c1bc6fb792ac95db"><td class="memItemLeft" align="right" valign="top">ThBrk * </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#a5025d40315abede7c1bc6fb792ac95db">th_brk_new</a> (const char *dictpath)</td></tr>
<tr class="memdesc:a5025d40315abede7c1bc6fb792ac95db"><td class="mdescLeft"> </td><td class="mdescRight">Create a dictionary-based word breaker. <a href="thbrk_8h.html#a5025d40315abede7c1bc6fb792ac95db">More...</a><br /></td></tr>
<tr class="separator:a5025d40315abede7c1bc6fb792ac95db"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a7d0a9fcab72812466336a06bf40e8fde"><td class="memItemLeft" align="right" valign="top">void </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#a7d0a9fcab72812466336a06bf40e8fde">th_brk_delete</a> (ThBrk *brk)</td></tr>
<tr class="memdesc:a7d0a9fcab72812466336a06bf40e8fde"><td class="mdescLeft"> </td><td class="mdescRight">Delete a word breaker. <a href="thbrk_8h.html#a7d0a9fcab72812466336a06bf40e8fde">More...</a><br /></td></tr>
<tr class="separator:a7d0a9fcab72812466336a06bf40e8fde"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ac4fd65b680140d72d91f9c249381a3e7"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#ac4fd65b680140d72d91f9c249381a3e7">th_brk_find_breaks</a> (ThBrk *brk, const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *s, int pos[], size_t pos_sz)</td></tr>
<tr class="memdesc:ac4fd65b680140d72d91f9c249381a3e7"><td class="mdescLeft"> </td><td class="mdescRight">Find word break positions in Thai string. <a href="thbrk_8h.html#ac4fd65b680140d72d91f9c249381a3e7">More...</a><br /></td></tr>
<tr class="separator:ac4fd65b680140d72d91f9c249381a3e7"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a4caba8399ab4c894534ab3949f340c9b"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#a4caba8399ab4c894534ab3949f340c9b">th_brk_insert_breaks</a> (ThBrk *brk, const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *in, <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *out, size_t out_sz, const char *delim)</td></tr>
<tr class="memdesc:a4caba8399ab4c894534ab3949f340c9b"><td class="mdescLeft"> </td><td class="mdescRight">Insert word delimitors in given string. <a href="thbrk_8h.html#a4caba8399ab4c894534ab3949f340c9b">More...</a><br /></td></tr>
<tr class="separator:a4caba8399ab4c894534ab3949f340c9b"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:af2968cc4572afd30f17f24fec3338b13"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#af2968cc4572afd30f17f24fec3338b13">th_brk</a> (const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *s, int pos[], size_t pos_sz)</td></tr>
<tr class="memdesc:af2968cc4572afd30f17f24fec3338b13"><td class="mdescLeft"> </td><td class="mdescRight">Find word break positions in Thai string. <a href="thbrk_8h.html#af2968cc4572afd30f17f24fec3338b13">More...</a><br /></td></tr>
<tr class="separator:af2968cc4572afd30f17f24fec3338b13"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ad6609cb290e2218489bc8d53c209bc10"><td class="memItemLeft" align="right" valign="top">int </td><td class="memItemRight" valign="bottom"><a class="el" href="thbrk_8h.html#ad6609cb290e2218489bc8d53c209bc10">th_brk_line</a> (const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *in, <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> *out, size_t out_sz, const char *delim)</td></tr>
<tr class="memdesc:ad6609cb290e2218489bc8d53c209bc10"><td class="mdescLeft"> </td><td class="mdescRight">Insert word delimitors in given string. <a href="thbrk_8h.html#ad6609cb290e2218489bc8d53c209bc10">More...</a><br /></td></tr>
<tr class="separator:ad6609cb290e2218489bc8d53c209bc10"><td class="memSeparator" colspan="2"> </td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>Thai word segmentation. </p>
</div><h2 class="groupheader">Function Documentation</h2>
<a id="af2968cc4572afd30f17f24fec3338b13"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af2968cc4572afd30f17f24fec3338b13">◆ </a></span>th_brk()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">int th_brk </td>
<td>(</td>
<td class="paramtype">const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>s</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int </td>
<td class="paramname"><em>pos</em>[], </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">size_t </td>
<td class="paramname"><em>pos_sz</em> </td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Find word break positions in Thai string. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">s</td><td>: the input string to be processed </td></tr>
<tr><td class="paramname">pos</td><td>: array to keep breaking positions </td></tr>
<tr><td class="paramname">pos_sz</td><td>: size of <em>pos</em>[]</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the actual number of breaking positions occurred</dd></dl>
<p>Finds word break positions in Thai string <em>s</em> and stores at most <em>n</em> breaking positions in <em>pos</em>[], from left to right. Uses the shared word breaker.</p>
<p>(This function is deprecated since version 0.1.25, in favor of <a class="el" href="thbrk_8h.html#ac4fd65b680140d72d91f9c249381a3e7" title="Find word break positions in Thai string.">th_brk_find_breaks()</a>, which is more thread-safe.) </p>
</div>
</div>
<a id="a7d0a9fcab72812466336a06bf40e8fde"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a7d0a9fcab72812466336a06bf40e8fde">◆ </a></span>th_brk_delete()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">void th_brk_delete </td>
<td>(</td>
<td class="paramtype">ThBrk * </td>
<td class="paramname"><em>brk</em></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Delete a word breaker. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">brk</td><td>: the word breaker</td></tr>
</table>
</dd>
</dl>
<p>Frees memory associated with the word breaker.</p>
<p>(Available since version 0.1.25, libthai.so.0.3.0) </p>
</div>
</div>
<a id="ac4fd65b680140d72d91f9c249381a3e7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac4fd65b680140d72d91f9c249381a3e7">◆ </a></span>th_brk_find_breaks()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">int th_brk_find_breaks </td>
<td>(</td>
<td class="paramtype">ThBrk * </td>
<td class="paramname"><em>brk</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>s</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int </td>
<td class="paramname"><em>pos</em>[], </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">size_t </td>
<td class="paramname"><em>pos_sz</em> </td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Find word break positions in Thai string. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">brk</td><td>: the word breaker </td></tr>
<tr><td class="paramname">s</td><td>: the input string to be processed </td></tr>
<tr><td class="paramname">pos</td><td>: array to keep breaking positions </td></tr>
<tr><td class="paramname">pos_sz</td><td>: size of <em>pos</em>[]</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the actual number of breaking positions occurred</dd></dl>
<p>Finds word break positions in Thai string <em>s</em> and stores at most <em>pos_sz</em> breaking positions in <em>pos</em>[], from left to right.</p>
<p>(Available since version 0.1.25, libthai.so.0.3.0) </p>
</div>
</div>
<a id="a4caba8399ab4c894534ab3949f340c9b"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a4caba8399ab4c894534ab3949f340c9b">◆ </a></span>th_brk_insert_breaks()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">int th_brk_insert_breaks </td>
<td>(</td>
<td class="paramtype">ThBrk * </td>
<td class="paramname"><em>brk</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>in</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype"><a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>out</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">size_t </td>
<td class="paramname"><em>out_sz</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char * </td>
<td class="paramname"><em>delim</em> </td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Insert word delimitors in given string. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">brk</td><td>: the word breaker </td></tr>
<tr><td class="paramname">in</td><td>: the input string to be processed </td></tr>
<tr><td class="paramname">out</td><td>: the output buffer </td></tr>
<tr><td class="paramname">out_sz</td><td>: the size of <em>out</em> </td></tr>
<tr><td class="paramname">delim</td><td>: the word delimitor to insert</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the actual size of the processed string</dd></dl>
<p>Analyzes the input string and store the string in output buffer with the given word delimitor inserted at every word boundary.</p>
<p>(Available since version 0.1.25, libthai.so.0.3.0) </p>
</div>
</div>
<a id="ad6609cb290e2218489bc8d53c209bc10"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad6609cb290e2218489bc8d53c209bc10">◆ </a></span>th_brk_line()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">int th_brk_line </td>
<td>(</td>
<td class="paramtype">const <a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>in</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype"><a class="el" href="thailib_8h.html#a75138f51cef42c2e0d8bc3e6ce691f51">thchar_t</a> * </td>
<td class="paramname"><em>out</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">size_t </td>
<td class="paramname"><em>out_sz</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char * </td>
<td class="paramname"><em>delim</em> </td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Insert word delimitors in given string. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">in</td><td>: the input string to be processed </td></tr>
<tr><td class="paramname">out</td><td>: the output buffer </td></tr>
<tr><td class="paramname">out_sz</td><td>: the size of <em>out</em> </td></tr>
<tr><td class="paramname">delim</td><td>: the word delimitor to insert</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the actual size of the processed string</dd></dl>
<p>Analyzes the input string and store the string in output buffer with the given word delimitor inserted at every word boundary. Uses the shared word breaker.</p>
<p>(This function is deprecated since version 0.1.25, in favor of <a class="el" href="thbrk_8h.html#a4caba8399ab4c894534ab3949f340c9b" title="Insert word delimitors in given string.">th_brk_insert_breaks()</a>, which is more thread-safe.) </p>
</div>
</div>
<a id="a5025d40315abede7c1bc6fb792ac95db"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a5025d40315abede7c1bc6fb792ac95db">◆ </a></span>th_brk_new()</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">ThBrk* th_brk_new </td>
<td>(</td>
<td class="paramtype">const char * </td>
<td class="paramname"><em>dictpath</em></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Create a dictionary-based word breaker. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">dictpath</td><td>: the dictionary path, or NULL for default</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>the created instance, or NULL on failure</dd></dl>
<p>Loads the dictionary from the given file and returns the created word breaker. If <em>dictpath</em> is NULL, first searches in the directory given by the LIBTHAI_DICTDIR environment variable, then in the library installation directory. Returns NULL if the dictionary file is not found or cannot be loaded.</p>
<p>The returned ThBrk object should be destroyed after use using <a class="el" href="thbrk_8h.html#a7d0a9fcab72812466336a06bf40e8fde" title="Delete a word breaker.">th_brk_delete()</a>.</p>
<p>In multi-thread environments, <a class="el" href="thbrk_8h.html#a5025d40315abede7c1bc6fb792ac95db" title="Create a dictionary-based word breaker.">th_brk_new()</a> and <a class="el" href="thbrk_8h.html#a7d0a9fcab72812466336a06bf40e8fde" title="Delete a word breaker.">th_brk_delete()</a> should be used to create and destroy a word breaker instance inside critical sections (i.e. with mutex). And the word breaker methods can then be safely called in parallel during its lifetime.</p>
<p>(Available since version 0.1.25, libthai.so.0.3.0) </p>
</div>
</div>
</div><!-- contents -->
<hr size="1"><address style="text-align: right;"><small>
Generated for libthai by <a href="http://www.doxygen.org/
index.html"><img src="doxygen.png" alt="doxygen" align="middle" border="0"></a>
1.9.1</small></address>
</body>
</html>