Link to home
Start Free TrialLog in
Avatar of lexxwern
lexxwernFlag for Netherlands

asked on

Math/Statistics: Weighted Least Squares Linear Regression in Php

So, I have years, data points (valus) and weights (for data values).

How do I implement a method or class in Php for achieving Weighted Least Squares Linear Regression.

Any math wizards out there willing to help? Pseudo code will do, I can Php'ize it.

Thanks,
Avatar of aburr
aburr
Flag of United States of America image

I am not familiar with Pseudo code but if you can do the least squares part I can suggest an inelligant way to weight your values.
Avatar of lexxwern

ASKER

Hi aburr,

Yes, I have code for that. I'll be using the Least Square Fit class. Let me post the code:

<?php
/*Aims
generate equation for straight line
picture of plot


*/
class ymxc{
var $input; //array (x,y)
//converts to selected colom to log
function line(){
//output m & c
/*
retrival variables
expanded array array(x, y, xy, x^2) - $input[][2], $input[][3]
n count of array
constant
gradient
returns constant and gradient
*/
$sumx=0;
$sumy=0;
$sumxy=0;
$sumxx=0;
$n=count($this->input);
$i=0;
foreach($this->input as $in){
$this->input[$i][2]=($in[0]*$in[1]);
$this->input[$i][3]=pow($in[0],2);
//sums
$sumx=$sumx+$in[0];
$sumy=$sumy+$in[1];
$sumxy=$sumxy+$this->input[$i][2];
$sumxx=$sumxx+$this->input[$i][3];
$i++;
}//4each
$xdivN=$sumx/$n;
$ydivN=$sumy/$n;
$gradient=($sumxy-$n*$xdivN*$ydivN)/($sumxx-$n*pow($xdivN,2));
$constant=$ydivN-$gradient*$xdivN;
$output=array($gradient,$constant);
$this->gradient=$gradient;
$this->constant=$constant;
//print_r($this->input);
$this->n=count($this->input);
return $output;
//http://en.wikipedia.org/wiki/Correlation
//
/* OU MS284 unit 7 page 41
ISBN= 7492 2066 2 */
}//line


function residuals(){
/*
expands array to include model array[][4]
*/
$i=0;
foreach($this->input as $ii){
$this->input[$i][4]=$this->gradient*$ii[0]+$this->constant;
$this->input[$i][5]=$this->input[$i][4]-$ii[1];
$i++;
}//foreach
foreach ($this->input as $key => $in)$res[$key]  = $in[5];
return $this->sdv($res);
}//residuals


function sdv($array){
$sumx=0;
$sumxx=0;
$n=count($array);
$i=0;
foreach($array as $in){
$squares=$in*$in;
//sums
$sumx=$sumx+$in;
$sumxx=$sumxx+$squares;
$i++;
}//4each
$xdivN=$sumx/$n;
$xxdivN=$sumxx/$n;
$dsv=pow($xxdivN-$xdivN*$xdivN,.5);
$output=array($xdivN,$dsv);
return $output;
//http://en.wikipedia.org/wiki/Standard_deviation
}

function plot($title,$xtitle,$ytitle,$plotname){
/*
requirs there to be an array of data
Plots picture of array with line through
writes picture to disk plot-datenumber.png
plot needs title xtitle and ytitle

*/
$n=$this->n;
$this->residuals();
$plot = imagecreate(600, 450);
$bg=imagecolorallocate($plot, 239, 239, 239);
imagefilledrectangle ($plot, 50, 50, 550, 400, imagecolorallocate($plot, 255, 255, 255));//plot size 500 x 350
$font = 'Font.TTF';
$texcol = imagecolorallocate($plot, 0, 0, 0);
$last=$this->input[count($this->input)-1];
//separate into colomns find min and max
foreach ($this->input as $key => $in) {
$x[$key]  = $in[0];
$y[$key]  = $in[1];

}
$xx=$x;
$yy=$y;
sort($y, SORT_NUMERIC);
sort($x, SORT_NUMERIC);


if($this->constant<min($y))$miny=$this->constant;
$xrange=$x[count($x)-1]-$x[0];
$yrange=$y[count($x)-1]-$y[0];
//echo "<br/>ranges miny $miny maxy $maxy yrange $yrange" ;

//plot points
//imagettftext($plot, 10, 0, 50, 420, $texcol, $font, round($x[0]));
//imagettftext($plot, 10, 0, 20, 400, $texcol, $font, round($y[0]));
for($i=0;$i<$n;$i++){
$xpoint= (($xx[$i]-$x[0])/$xrange*500)+45;
$ypoint= (400-(($yy[$i]-$y[0])/$yrange*350));
imagettftext($plot, 10, 0, $xpoint, $ypoint, $texcol, $font, "x");
//echo "<br>$i x= $xpoint, y=$ypoint";
}
if(!isset($xtitle))$xtitle="X Values";
imagettftext($plot, 15, 0, 250, 440, $texcol, $font, $xtitle);
if(!isset($ytitle))$ytitle="Y Values";
imagettftext($plot, 15, 90, 20, 240, $texcol, $font, $ytitle);
if(!isset($title))$title="Title";
imagettftext($plot, 20, 0, 100, 30, $texcol, $font, $title);
imagettftext($plot, 10, 0, 450, 40, imagecolorallocate($plot, 0, 0, 255), $font, "Gradient =".round($this->gradient,4));
imagettftext($plot, 10, 0, 450, 20, imagecolorallocate($plot, 0, 0, 255), $font, "Constant =".round($this->constant,4));

//set y markers
for($i=0;$i<6;$i++){
imagettftext($plot, 5, 90, 50, 70*$i+50, $texcol, $font, "|");
imagettftext($plot, 10, 90, 40, 450-(70*$i+50), $texcol, $font, round($i*($y[count($x)-1]-$y[0])/5+$y[0]));
imagettftext($plot, 5, 0, 50+500/5*$i, 403, $texcol, $font, "|");  
imagettftext($plot, 10, 0, 50+500/5*$i, 420, $texcol, $font, round($i*($x[count($x)-1]-$x[0])/5+$x[0]));
}

//plot line    
$s=$x[0]*$this->gradient+$this->constant;
$s=(400-(($s-$y[0])/$yrange*350));

$t=$x[$n-1]*$this->gradient+$this->constant;
$t= 400-($t-$y[0])/$yrange*350;
//imageline ($plot, 50, 400, 550, 50, imagecolorallocate($plot, 255, 0, 0) );
imagettftext($plot, 10, 0, 15, $s, $texcol, $font, round($this->constant,2).",0");
imageline ($plot, 50, $s, 550, $t, imagecolorallocate($plot, 255, 0, 0) );
imageline ($plot, 50, 400-(-$y[0]/$yrange*350), 550, 400-(-$y[0]/$yrange*350), imagecolorallocate($plot, 0, 0, 255) );
$xo=($this->constant-$x[0]);
$xo= (($x0-$x[0])/$xrange*500)+50;
imageline ($plot, $xo, 400, $xo, 50, imagecolorallocate($plot, 0, 255, 0) );
//imagepng($plot, "plot-".strtotime('now').".png");
if(!isset($plotname))$plotname="plot";
imagepng($plot, $plotname.".png");
}//plot




}//class
?>
ASKER CERTIFIED SOLUTION
Avatar of aburr
aburr
Flag of United States of America image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
So that is effectively as good as multiplying the weight with the value of the dependent variable. Is that all that separates Linear Regression from Weighted Linear Regression?
I submitted an answer, the correctness of which was confirmed