c:[["$","$Ld",null,{"page":"page"}],["$","$Le",null,{}],["$","div",null,{"className":"container shadow-4 rounded mt-3 mb-3 p-3 border border-primary","children":[[["$","$Lf","0",{"href":"../python","className":"badge badge-primary m-1","children":"python"}],["$","$Lf","1",{"href":"../python-3.x","className":"badge badge-primary m-1","children":"python-3.x"}],["$","$Lf","2",{"href":"../scikit-learn","className":"badge badge-primary m-1","children":"scikit-learn"}],["$","$Lf","3",{"href":"../countvectorizer","className":"badge badge-primary m-1","children":"countvectorizer"}]],["$","h1",null,{"className":"h1","dangerouslySetInnerHTML":{"__html":"Should CountVectorizer be fit on both the train and test sets?"}}],["$","hr",null,{}],["$","div",null,{"dangerouslySetInnerHTML":{"__html":"

I have come across various articles online, some of which suggest that CountVectorizer should be fit on both the train and test sets, and some suggest that it should be fit only on the train set.\nWhich approach is generally better for text classification?

\n"}}],["$","hr",null,{}],["$","div",null,{"className":"h3","children":["Solution ",["$","li",null,{"className":"h3 fa fa-arrow-down"}]]}],["$","hr",null,{}],["$","div",null,{"dangerouslySetInnerHTML":{"__html":"

Generally the test_set should be kept unobserved, so the CountVectorizer should be only fitted on train_set

\n"}}],["$","br",null,{}],["$","ul",null,{"className":"list-group","children":[]}],["$","br",null,{}],["$","$L10",null,{}],["$","br",null,{}]]}],["$","$L11",null,{}],["$","$L12",null,{}],["$","$L13",null,{}],["$","div",null,{"className":"container ftr1","children":[["$","hr",null,{}],["$","footer",null,{"className":"bg-body-tertiary text-center text-lg-start","children":["$","div",null,{"className":"text-center p-3","children":["Content Source :",["$","a",null,{"className":"text-body","href":"https://stackoverflow.com","rel":"nofollow noreferrer noopener","id":"ftlk","style":{"color":"black"},"children":"Stackoverflow"}]," | ",["$","$Lf",null,{"href":"/privacy-policy","style":{"color":"blue !important"},"children":"Privacy Policy"}]," | ",["$","$Lf",null,{"href":"/terms-and-condition","style":{"color":"blue !important"},"children":"Terms and Condition"}]," | ",["$","$Lf",null,{"href":"/contact-us","style":{"color":"blue !important"},"children":"Contact Us"}]]}]}]]}],["$","$L14",null,{}],["$","$L15",null,{}]]