I'm trying to pull some text from an external website using this script.
It works perfectly, but it gets the entire page. I want to take only the content inside a specific div with the class 'content'. The entire page is put inside the variable 'data', and then this function is created to strip some tags:
function filterData(data){
data = data.replace(/<?\/body[^>]*>/g,'');
data = data.replace(/[\r|\n]+/g,'');
data = data.replace(/<--[\S\s]*?-->/g,'');
data = data.replace(/<noscript[^>]*>[\S\s]*?<\/noscript>/g,'');
data = data.replace(/<script[^>]*>[\S\s]*?<\/script>/g,'');
data = data.replace(/<script.*\/>/,'');
return data;
}
How would I go about finding the div with the class 'content' and only viewing the content inside that?
UPDATE: Sorry about using RegExes — can you help me to get the content without using RegEx? So, this is my HTML file:
<a href=".asp?a=9" class="ajaxtrigger">erg</a>
<div id="target" style="width:200px;height:500px;"></div>
<div id="code" style="width:200px;height:200px;"></div>
<script src=".min.js"></script>
<script>
$(document).ready(function(){
var container = $('#target');
$('.ajaxtrigger').click(function(){
doAjax($(this).attr('href'));
return false;
});
function doAjax(url){
if(url.match('^http')){
$.getJSON("?"+
"q=select%20*%20from%20html%20where%20url%3D%22"+
encodeURIComponent(url)+
"%22&format=xml'&callback=?",
function(data){
if(data.results[0]){
var tree = string2dom(data.results[0]);
container.html($("div.content", tree.doc));tree.destroy();
} else {
var errormsg = '<p>Error: could not load the page.</p>';
container.html(errormsg);
}
}
);
} else {
$('#target').load(url);
}
}
function filterData(data){
return tree;
}
});
</script>
I'm trying to pull some text from an external website using this script.
It works perfectly, but it gets the entire page. I want to take only the content inside a specific div with the class 'content'. The entire page is put inside the variable 'data', and then this function is created to strip some tags:
function filterData(data){
data = data.replace(/<?\/body[^>]*>/g,'');
data = data.replace(/[\r|\n]+/g,'');
data = data.replace(/<--[\S\s]*?-->/g,'');
data = data.replace(/<noscript[^>]*>[\S\s]*?<\/noscript>/g,'');
data = data.replace(/<script[^>]*>[\S\s]*?<\/script>/g,'');
data = data.replace(/<script.*\/>/,'');
return data;
}
How would I go about finding the div with the class 'content' and only viewing the content inside that?
UPDATE: Sorry about using RegExes — can you help me to get the content without using RegEx? So, this is my HTML file:
<a href="http://www.eurest.dk/kantiner/228/all.asp?a=9" class="ajaxtrigger">erg</a>
<div id="target" style="width:200px;height:500px;"></div>
<div id="code" style="width:200px;height:200px;"></div>
<script src="http://code.jquery./jquery.min.js"></script>
<script>
$(document).ready(function(){
var container = $('#target');
$('.ajaxtrigger').click(function(){
doAjax($(this).attr('href'));
return false;
});
function doAjax(url){
if(url.match('^http')){
$.getJSON("http://query.yahooapis./v1/public/yql?"+
"q=select%20*%20from%20html%20where%20url%3D%22"+
encodeURIComponent(url)+
"%22&format=xml'&callback=?",
function(data){
if(data.results[0]){
var tree = string2dom(data.results[0]);
container.html($("div.content", tree.doc));tree.destroy();
} else {
var errormsg = '<p>Error: could not load the page.</p>';
container.html(errormsg);
}
}
);
} else {
$('#target').load(url);
}
}
function filterData(data){
return tree;
}
});
</script>
Share
Improve this question
edited Oct 7, 2011 at 11:27
hoegenhaug
asked Oct 7, 2011 at 9:20
hoegenhaughoegenhaug
251 gold badge1 silver badge8 bronze badges
3
- 1 Why are you using regex to parse HTML? Especially in with JavaScript the browser this is entirely unnecessary, you can use the DOM. – Tomalak Commented Oct 7, 2011 at 9:36
- 1 Meanwhile, over on Planet Sane: stackoverflow./questions/1732348/… – johnsyweb Commented Oct 7, 2011 at 9:53
- 1 Yeah, okay, it seems like I shouldn't be using RegEx to do this. Thing is, my JavaScript skills are very limited, and the code I found used RegEx, so that's why I'm using them. But I'll try using the DOM. Thanks! – hoegenhaug Commented Oct 7, 2011 at 10:10
4 Answers
Reset to default 3Try something like this:
var matches = data.match(/<div class="content">([^<]*)<\/div>/);
if (matches)
return matches[1]; // div content
try this:
<div\b[^>]*class="content"[^>]*>([\s\S]*?)<\/div>
Here try this :
<div[^>]*?class='content'[^>]*?>(.*?)</div>
Captured reference /1 will have your content. Although you shouldn't be doing this with regexes :)
this may help you:
var divtxt = match(/<div[^>]*class="content"[^>]>.*<\/div>/);
but it may stop at the wrong .
you should use jquery or prototype to make it a dom-object and use selectors to find the right div. using jquery you would do something like:
var divtxt = $(data).find(".content").first().html();
remember to load the jquery library first.
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744224426a4563935.html
评论列表(0条)