lexxwern
asked on
Math/Statistics: Weighted Least Squares Linear Regression in Php
So, I have years, data points (valus) and weights (for data values).
How do I implement a method or class in Php for achieving Weighted Least Squares Linear Regression.
Any math wizards out there willing to help? Pseudo code will do, I can Php'ize it.
Thanks,
How do I implement a method or class in Php for achieving Weighted Least Squares Linear Regression.
Any math wizards out there willing to help? Pseudo code will do, I can Php'ize it.
Thanks,
I am not familiar with Pseudo code but if you can do the least squares part I can suggest an inelligant way to weight your values.
ASKER
Hi aburr,
Yes, I have code for that. I'll be using the Least Square Fit class. Let me post the code:
<?php
/*Aims
generate equation for straight line
picture of plot
*/
class ymxc{
var $input; //array (x,y)
//converts to selected colom to log
function line(){
//output m & c
/*
retrival variables
expanded array array(x, y, xy, x^2) - $input[][2], $input[][3]
n count of array
constant
gradient
returns constant and gradient
*/
$sumx=0;
$sumy=0;
$sumxy=0;
$sumxx=0;
$n=count($this->input);
$i=0;
foreach($this->input as $in){
$this->input[$i][2]=($in[0 ]*$in[1]);
$this->input[$i][3]=pow($i n[0],2);
//sums
$sumx=$sumx+$in[0];
$sumy=$sumy+$in[1];
$sumxy=$sumxy+$this->input [$i][2];
$sumxx=$sumxx+$this->input [$i][3];
$i++;
}//4each
$xdivN=$sumx/$n;
$ydivN=$sumy/$n;
$gradient=($sumxy-$n*$xdiv N*$ydivN)/ ($sumxx-$n *pow($xdiv N,2));
$constant=$ydivN-$gradient *$xdivN;
$output=array($gradient,$c onstant);
$this->gradient=$gradient;
$this->constant=$constant;
//print_r($this->input);
$this->n=count($this->inpu t);
return $output;
//http://en.wikipedia.org/wiki/Correlation
//
/* OU MS284 unit 7 page 41
ISBN= 7492 2066 2 */
}//line
function residuals(){
/*
expands array to include model array[][4]
*/
$i=0;
foreach($this->input as $ii){
$this->input[$i][4]=$this- >gradient* $ii[0]+$th is->consta nt;
$this->input[$i][5]=$this- >input[$i] [4]-$ii[1] ;
$i++;
}//foreach
foreach ($this->input as $key => $in)$res[$key] = $in[5];
return $this->sdv($res);
}//residuals
function sdv($array){
$sumx=0;
$sumxx=0;
$n=count($array);
$i=0;
foreach($array as $in){
$squares=$in*$in;
//sums
$sumx=$sumx+$in;
$sumxx=$sumxx+$squares;
$i++;
}//4each
$xdivN=$sumx/$n;
$xxdivN=$sumxx/$n;
$dsv=pow($xxdivN-$xdivN*$x divN,.5);
$output=array($xdivN,$dsv) ;
return $output;
//http://en.wikipedia.org/wiki/Standard_deviation
}
function plot($title,$xtitle,$ytitl e,$plotnam e){
/*
requirs there to be an array of data
Plots picture of array with line through
writes picture to disk plot-datenumber.png
plot needs title xtitle and ytitle
*/
$n=$this->n;
$this->residuals();
$plot = imagecreate(600, 450);
$bg=imagecolorallocate($pl ot, 239, 239, 239);
imagefilledrectangle ($plot, 50, 50, 550, 400, imagecolorallocate($plot, 255, 255, 255));//plot size 500 x 350
$font = 'Font.TTF';
$texcol = imagecolorallocate($plot, 0, 0, 0);
$last=$this->input[count($ this->inpu t)-1];
//separate into colomns find min and max
foreach ($this->input as $key => $in) {
$x[$key] = $in[0];
$y[$key] = $in[1];
}
$xx=$x;
$yy=$y;
sort($y, SORT_NUMERIC);
sort($x, SORT_NUMERIC);
if($this->constant<min($y) )$miny=$th is->consta nt;
$xrange=$x[count($x)-1]-$x [0];
$yrange=$y[count($x)-1]-$y [0];
//echo "<br/>ranges miny $miny maxy $maxy yrange $yrange" ;
//plot points
//imagettftext($plot, 10, 0, 50, 420, $texcol, $font, round($x[0]));
//imagettftext($plot, 10, 0, 20, 400, $texcol, $font, round($y[0]));
for($i=0;$i<$n;$i++){
$xpoint= (($xx[$i]-$x[0])/$xrange*5 00)+45;
$ypoint= (400-(($yy[$i]-$y[0])/$yra nge*350));
imagettftext($plot, 10, 0, $xpoint, $ypoint, $texcol, $font, "x");
//echo "<br>$i x= $xpoint, y=$ypoint";
}
if(!isset($xtitle))$xtitle ="X Values";
imagettftext($plot, 15, 0, 250, 440, $texcol, $font, $xtitle);
if(!isset($ytitle))$ytitle ="Y Values";
imagettftext($plot, 15, 90, 20, 240, $texcol, $font, $ytitle);
if(!isset($title))$title=" Title";
imagettftext($plot, 20, 0, 100, 30, $texcol, $font, $title);
imagettftext($plot, 10, 0, 450, 40, imagecolorallocate($plot, 0, 0, 255), $font, "Gradient =".round($this->gradient,4 ));
imagettftext($plot, 10, 0, 450, 20, imagecolorallocate($plot, 0, 0, 255), $font, "Constant =".round($this->constant,4 ));
//set y markers
for($i=0;$i<6;$i++){
imagettftext($plot, 5, 90, 50, 70*$i+50, $texcol, $font, "|");
imagettftext($plot, 10, 90, 40, 450-(70*$i+50), $texcol, $font, round($i*($y[count($x)-1]- $y[0])/5+$ y[0]));
imagettftext($plot, 5, 0, 50+500/5*$i, 403, $texcol, $font, "|");
imagettftext($plot, 10, 0, 50+500/5*$i, 420, $texcol, $font, round($i*($x[count($x)-1]- $x[0])/5+$ x[0]));
}
//plot line
$s=$x[0]*$this->gradient+$ this->cons tant;
$s=(400-(($s-$y[0])/$yrang e*350));
$t=$x[$n-1]*$this->gradien t+$this->c onstant;
$t= 400-($t-$y[0])/$yrange*350 ;
//imageline ($plot, 50, 400, 550, 50, imagecolorallocate($plot, 255, 0, 0) );
imagettftext($plot, 10, 0, 15, $s, $texcol, $font, round($this->constant,2)." ,0");
imageline ($plot, 50, $s, 550, $t, imagecolorallocate($plot, 255, 0, 0) );
imageline ($plot, 50, 400-(-$y[0]/$yrange*350), 550, 400-(-$y[0]/$yrange*350), imagecolorallocate($plot, 0, 0, 255) );
$xo=($this->constant-$x[0] );
$xo= (($x0-$x[0])/$xrange*500)+ 50;
imageline ($plot, $xo, 400, $xo, 50, imagecolorallocate($plot, 0, 255, 0) );
//imagepng($plot, "plot-".strtotime('now')." .png");
if(!isset($plotname))$plot name="plot ";
imagepng($plot, $plotname.".png");
}//plot
}//class
?>
Yes, I have code for that. I'll be using the Least Square Fit class. Let me post the code:
<?php
/*Aims
generate equation for straight line
picture of plot
*/
class ymxc{
var $input; //array (x,y)
//converts to selected colom to log
function line(){
//output m & c
/*
retrival variables
expanded array array(x, y, xy, x^2) - $input[][2], $input[][3]
n count of array
constant
gradient
returns constant and gradient
*/
$sumx=0;
$sumy=0;
$sumxy=0;
$sumxx=0;
$n=count($this->input);
$i=0;
foreach($this->input as $in){
$this->input[$i][2]=($in[0
$this->input[$i][3]=pow($i
//sums
$sumx=$sumx+$in[0];
$sumy=$sumy+$in[1];
$sumxy=$sumxy+$this->input
$sumxx=$sumxx+$this->input
$i++;
}//4each
$xdivN=$sumx/$n;
$ydivN=$sumy/$n;
$gradient=($sumxy-$n*$xdiv
$constant=$ydivN-$gradient
$output=array($gradient,$c
$this->gradient=$gradient;
$this->constant=$constant;
//print_r($this->input);
$this->n=count($this->inpu
return $output;
//http://en.wikipedia.org/wiki/Correlation
//
/* OU MS284 unit 7 page 41
ISBN= 7492 2066 2 */
}//line
function residuals(){
/*
expands array to include model array[][4]
*/
$i=0;
foreach($this->input as $ii){
$this->input[$i][4]=$this-
$this->input[$i][5]=$this-
$i++;
}//foreach
foreach ($this->input as $key => $in)$res[$key] = $in[5];
return $this->sdv($res);
}//residuals
function sdv($array){
$sumx=0;
$sumxx=0;
$n=count($array);
$i=0;
foreach($array as $in){
$squares=$in*$in;
//sums
$sumx=$sumx+$in;
$sumxx=$sumxx+$squares;
$i++;
}//4each
$xdivN=$sumx/$n;
$xxdivN=$sumxx/$n;
$dsv=pow($xxdivN-$xdivN*$x
$output=array($xdivN,$dsv)
return $output;
//http://en.wikipedia.org/wiki/Standard_deviation
}
function plot($title,$xtitle,$ytitl
/*
requirs there to be an array of data
Plots picture of array with line through
writes picture to disk plot-datenumber.png
plot needs title xtitle and ytitle
*/
$n=$this->n;
$this->residuals();
$plot = imagecreate(600, 450);
$bg=imagecolorallocate($pl
imagefilledrectangle ($plot, 50, 50, 550, 400, imagecolorallocate($plot, 255, 255, 255));//plot size 500 x 350
$font = 'Font.TTF';
$texcol = imagecolorallocate($plot, 0, 0, 0);
$last=$this->input[count($
//separate into colomns find min and max
foreach ($this->input as $key => $in) {
$x[$key] = $in[0];
$y[$key] = $in[1];
}
$xx=$x;
$yy=$y;
sort($y, SORT_NUMERIC);
sort($x, SORT_NUMERIC);
if($this->constant<min($y)
$xrange=$x[count($x)-1]-$x
$yrange=$y[count($x)-1]-$y
//echo "<br/>ranges miny $miny maxy $maxy yrange $yrange" ;
//plot points
//imagettftext($plot, 10, 0, 50, 420, $texcol, $font, round($x[0]));
//imagettftext($plot, 10, 0, 20, 400, $texcol, $font, round($y[0]));
for($i=0;$i<$n;$i++){
$xpoint= (($xx[$i]-$x[0])/$xrange*5
$ypoint= (400-(($yy[$i]-$y[0])/$yra
imagettftext($plot, 10, 0, $xpoint, $ypoint, $texcol, $font, "x");
//echo "<br>$i x= $xpoint, y=$ypoint";
}
if(!isset($xtitle))$xtitle
imagettftext($plot, 15, 0, 250, 440, $texcol, $font, $xtitle);
if(!isset($ytitle))$ytitle
imagettftext($plot, 15, 90, 20, 240, $texcol, $font, $ytitle);
if(!isset($title))$title="
imagettftext($plot, 20, 0, 100, 30, $texcol, $font, $title);
imagettftext($plot, 10, 0, 450, 40, imagecolorallocate($plot, 0, 0, 255), $font, "Gradient =".round($this->gradient,4
imagettftext($plot, 10, 0, 450, 20, imagecolorallocate($plot, 0, 0, 255), $font, "Constant =".round($this->constant,4
//set y markers
for($i=0;$i<6;$i++){
imagettftext($plot, 5, 90, 50, 70*$i+50, $texcol, $font, "|");
imagettftext($plot, 10, 90, 40, 450-(70*$i+50), $texcol, $font, round($i*($y[count($x)-1]-
imagettftext($plot, 5, 0, 50+500/5*$i, 403, $texcol, $font, "|");
imagettftext($plot, 10, 0, 50+500/5*$i, 420, $texcol, $font, round($i*($x[count($x)-1]-
}
//plot line
$s=$x[0]*$this->gradient+$
$s=(400-(($s-$y[0])/$yrang
$t=$x[$n-1]*$this->gradien
$t= 400-($t-$y[0])/$yrange*350
//imageline ($plot, 50, 400, 550, 50, imagecolorallocate($plot, 255, 0, 0) );
imagettftext($plot, 10, 0, 15, $s, $texcol, $font, round($this->constant,2)."
imageline ($plot, 50, $s, 550, $t, imagecolorallocate($plot, 255, 0, 0) );
imageline ($plot, 50, 400-(-$y[0]/$yrange*350), 550, 400-(-$y[0]/$yrange*350), imagecolorallocate($plot, 0, 0, 255) );
$xo=($this->constant-$x[0]
$xo= (($x0-$x[0])/$xrange*500)+
imageline ($plot, $xo, 400, $xo, 50, imagecolorallocate($plot, 0, 255, 0) );
//imagepng($plot, "plot-".strtotime('now')."
if(!isset($plotname))$plot
imagepng($plot, $plotname.".png");
}//plot
}//class
?>
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
So that is effectively as good as multiplying the weight with the value of the dependent variable. Is that all that separates Linear Regression from Weighted Linear Regression?
essentially, yes
http://en.wikipedia.org/wiki/Weighted_least_squares
http://en.wikipedia.org/wiki/Weighted_least_squares
I submitted an answer, the correctness of which was confirmed