<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>DEV Community: hfrnssc</title>
    <description>The latest articles on DEV Community by hfrnssc (@hfrnssc).</description>
    <link>https://dev.to/hfrnssc</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F510184%2F651fdc92-da7b-4151-9f05-9e366f05413e.png</url>
      <title>DEV Community: hfrnssc</title>
      <link>https://dev.to/hfrnssc</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://dev.to/feed/hfrnssc"/>
    <language>en</language>
    <item>
      <title>How can I solve the tokenization process with R?</title>
      <dc:creator>hfrnssc</dc:creator>
      <pubDate>Sun, 08 Nov 2020 04:20:23 +0000</pubDate>
      <link>https://dev.to/hfrnssc/how-can-i-solve-the-tokenization-process-with-r-48gb</link>
      <guid>https://dev.to/hfrnssc/how-can-i-solve-the-tokenization-process-with-r-48gb</guid>
      <description>&lt;div class="ltag__stackexchange--container"&gt;
  &lt;div class="ltag__stackexchange--title-container"&gt;
    
      &lt;div class="ltag__stackexchange--title"&gt;
        &lt;h1&gt;
          &lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--pTF_nE4a--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_880/https://practicaldev-herokuapp-com.freetls.fastly.net/assets/stackoverflow-logo-b42691ae545e4810b105ee957979a853a696085e67e43ee14c5699cf3e890fb4.svg" alt=""&gt;
            &lt;a href="https://stackoverflow.com/questions/64730524/how-can-i-solve-the-tokenization-process-with-r" rel="noopener noreferrer"&gt;
               How can I solve the tokenization process with R?
            &lt;/a&gt;
        &lt;/h1&gt;
        &lt;div class="ltag__stackexchange--post-metadata"&gt;
          &lt;span&gt;Nov  7 '20&lt;/span&gt;
            &lt;span&gt;Comments: 1&lt;/span&gt;
            &lt;span&gt;Answers: 0&lt;/span&gt;
        &lt;/div&gt;
      &lt;/div&gt;
      &lt;a class="ltag__stackexchange--score-container" href="https://stackoverflow.com/questions/64730524/how-can-i-solve-the-tokenization-process-with-r" rel="noopener noreferrer"&gt;
        &lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--5MiFESHx--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_880/https://practicaldev-herokuapp-com.freetls.fastly.net/assets/stackexchange-arrow-up-eff2e2849e67d156181d258e38802c0b57fa011f74164a7f97675ca3b6ab756b.svg" alt=""&gt;
        &lt;div class="ltag__stackexchange--score-number"&gt;
          0
        &lt;/div&gt;
        &lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--Rk_a5QFN--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_880/https://practicaldev-herokuapp-com.freetls.fastly.net/assets/stackexchange-arrow-down-4349fac0dd932d284fab7e4dd9846f19a3710558efde0d2dfd05897f3eeb9aba.svg" alt=""&gt;
      &lt;/a&gt;
    
  &lt;/div&gt;
  &lt;div class="ltag__stackexchange--body"&gt;
    
&lt;p&gt;I want to process the text
from this&lt;/p&gt;
&lt;p&gt;&lt;a href="https://i.stack.imgur.com/uAFlZ.png" rel="nofollow noreferrer"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--mvgVlmUD--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_880/https://i.stack.imgur.com/uAFlZ.png" alt="enter image description here"&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;to this with removal of regular expression, like emoticon symbol, numbers, punctuation, etc.&lt;/p&gt;
&lt;p&gt;&lt;a href="https://i.stack.imgur.com/BRk9c.png" rel="nofollow noreferrer"&gt;&lt;img src="https://res.cloudinary.com/practicaldev/image/fetch/s--PhFpwWkX--/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_880/https://i.stack.imgur.com/BRk9c.png" alt="enter image description here"&gt;&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;I have tried with this code&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;library(dplyr)
library(tidyr)
library(tidytext)
library(textdata)
library(purrr)
library(csv)
sentanalysis &amp;lt;- read.csv('crawling_commuterline.csv', header=TRUE, sep=";", encoding="UTF-8")
sentanalysis

tweetdt = sentanalysis %&amp;gt;% select(tw)
head(tweetdt)

tidytext::unnest_tokens(read.csv("crawling_commuterline.csv", stringsAsFactors = FALSE),word,tw)
&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;…&lt;/p&gt;
    
  &lt;/div&gt;
  &lt;div class="ltag__stackexchange--btn--container"&gt;
    
      &lt;a href="https://stackoverflow.com/questions/64730524/how-can-i-solve-the-tokenization-process-with-r" rel="noopener noreferrer"&gt;Open Full Question&lt;/a&gt;
    
  &lt;/div&gt;
&lt;/div&gt;


</description>
    </item>
  </channel>
</rss>
